summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreb2008-06-05 22:45:24 +0000
committereb2008-06-05 22:45:24 +0000
commit852ba2210329b25db1b2fe2423cec30175088f1f (patch)
tree41e56c30709757bb957b51c306560ab9c050ddb1
parent30fde16beb9901189fa9546f95b26049df48c4c1 (diff)
downloadgnuradio-852ba2210329b25db1b2fe2423cec30175088f1f.tar.gz
gnuradio-852ba2210329b25db1b2fe2423cec30175088f1f.tar.bz2
gnuradio-852ba2210329b25db1b2fe2423cec30175088f1f.zip
refactored gc_jd_queue_data, faster mutex_unlock
git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8558 221aa14e-8319-0410-a670-987f0aec2ac5
-rw-r--r--gcell/src/include/gc_jd_queue_data.h24
-rw-r--r--gcell/src/lib/runtime/gc_jd_queue.c36
-rw-r--r--gcell/src/lib/runtime/spu/gc_spu_jd_queue.c39
3 files changed, 70 insertions, 29 deletions
diff --git a/gcell/src/include/gc_jd_queue_data.h b/gcell/src/include/gc_jd_queue_data.h
index d48591bd2..e5fa87499 100644
--- a/gcell/src/include/gc_jd_queue_data.h
+++ b/gcell/src/include/gc_jd_queue_data.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -36,13 +36,31 @@ __GC_BEGIN_DECLS
*
* FIXME make it lock free ;) For now, use a spin lock.
*/
-typedef struct gc_jd_queue
+
+typedef struct gc_jd_q_links
{
gc_eaddr_t head _AL16;
gc_eaddr_t tail _AL16;
+} gc_jd_q_links_t;
+
+typedef struct gc_jd_q_mutex
+{
uint32_t mutex; // libsync mutex (spin lock)
-} gc_jd_queue_t;
+ uint32_t _pad[31]; // pad to cache line so we can use putlluc on SPE
+} _AL128 gc_jd_q_mutex_t;
+typedef struct gc_jd_q_flag
+{
+ uint32_t flag; // host writes this after enqueuing
+ uint32_t _pad[31]; // pad to cache line
+} _AL128 gc_jd_q_flag_t;
+
+typedef struct gc_jd_queue
+{
+ gc_jd_q_links_t l;
+ gc_jd_q_mutex_t m;
+ gc_jd_q_flag_t f;
+} _AL128 gc_jd_queue_t;
__GC_END_DECLS
diff --git a/gcell/src/lib/runtime/gc_jd_queue.c b/gcell/src/lib/runtime/gc_jd_queue.c
index b5cdcac9b..29b74c29d 100644
--- a/gcell/src/lib/runtime/gc_jd_queue.c
+++ b/gcell/src/lib/runtime/gc_jd_queue.c
@@ -28,9 +28,10 @@
void
gc_jd_queue_init(gc_jd_queue_t *q)
{
- _mutex_init(ptr_to_ea(&q->mutex));
- q->head = 0;
- q->tail = 0;
+ _mutex_init(ptr_to_ea(&q->m.mutex));
+ q->l.head = 0;
+ q->l.tail = 0;
+ q->f.flag = 0;
smp_wmb();
}
@@ -38,41 +39,44 @@ void
gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item)
{
item->sys.next = 0;
- _mutex_lock(ptr_to_ea(&q->mutex));
+ _mutex_lock(ptr_to_ea(&q->m.mutex));
smp_rmb(); // import barrier
- if (q->tail == 0){ // currently empty
- q->tail = q->head = jdp_to_ea(item);
+ if (q->l.tail == 0){ // currently empty
+ q->l.tail = q->l.head = jdp_to_ea(item);
}
else { // not empty, append
- ea_to_jdp(q->tail)->sys.next = jdp_to_ea(item);
- q->tail = jdp_to_ea(item);
+ ea_to_jdp(q->l.tail)->sys.next = jdp_to_ea(item);
+ q->l.tail = jdp_to_ea(item);
}
smp_wmb(); // orders stores above before clearing of mutex
- _mutex_unlock(ptr_to_ea(&q->mutex));
+ _mutex_unlock(ptr_to_ea(&q->m.mutex));
+
+ // let SPE's know we wrote something if they've got a lock-line reservation
+ q->f.flag = 1;
}
gc_job_desc_t *
gc_jd_queue_dequeue(gc_jd_queue_t *q)
{
- _mutex_lock(ptr_to_ea(&q->mutex));
+ _mutex_lock(ptr_to_ea(&q->m.mutex));
smp_rmb(); // import barrier
- gc_eaddr_t item_ea = q->head;
+ gc_eaddr_t item_ea = q->l.head;
if (item_ea == 0){ // empty
- _mutex_unlock(ptr_to_ea(&q->mutex));
+ _mutex_unlock(ptr_to_ea(&q->m.mutex));
return 0;
}
- q->head = ea_to_jdp(item_ea)->sys.next;
- if (q->head == 0) // now emtpy
- q->tail = 0;
+ q->l.head = ea_to_jdp(item_ea)->sys.next;
+ if (q->l.head == 0) // now emtpy
+ q->l.tail = 0;
gc_job_desc_t *item = ea_to_jdp(item_ea);
item->sys.next = 0;
smp_wmb(); // orders stores above before clearing of mutex
- _mutex_unlock(ptr_to_ea(&q->mutex));
+ _mutex_unlock(ptr_to_ea(&q->m.mutex));
return item;
}
diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
index 22752fe68..0dd165fc0 100644
--- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
+++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
@@ -25,11 +25,30 @@
extern int gc_sys_tag;
+/*
+ * ea must be 128-byte aligned, the mutex is in the first int32_t, and
+ * it must be safe to write the remaining 124 bytes with anything at
+ * all.
+ */
+static __inline void _fast_mutex_unlock(mutex_ea_t ea)
+{
+ char _tmp[256];
+ vector signed int *buf
+ = (vector signed int *) ALIGN(_tmp, 128); // get cache-aligned buffer
+
+ buf[0] = spu_splats(0); // the value that unlocks the mutex
+
+ mfc_putlluc(buf, ea, 0, 0); // unconditional put, no reservation reqd
+ spu_readch(MFC_RdAtomicStat);
+}
+
+
+
bool
gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
int jd_tag, gc_job_desc_t *item)
{
- gc_jd_queue_t local_q;
+ gc_jd_q_links_t local_q;
// Before aquiring the lock, see if it's possible that there's
// something in the queue. Checking in this way makes it easier
@@ -37,7 +56,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
// the lock unless there is something in the queue.
// copy in the queue structure
- mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+ mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
mfc_read_tag_status_all(); // wait for DMA to complete
@@ -48,15 +67,15 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
// When we peeked, head was non-zero. Now grab the
// lock and do it for real.
- _mutex_lock(q + offsetof(gc_jd_queue_t, mutex));
+ _mutex_lock(q + offsetof(gc_jd_queue_t, m.mutex));
// copy in the queue structure
- mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+ mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
mfc_read_tag_status_all(); // wait for DMA to complete
if (local_q.head == 0){ // empty
- _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
+ _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex));
return false;
}
@@ -77,7 +96,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
// copy the queue structure back out
- mfc_put(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0);
+ mfc_put(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0);
mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
mfc_read_tag_status_all(); // wait for DMA to complete
@@ -89,7 +108,7 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
// a normal DMA, and that a putlluc is better than a putllc if
// you can use it.
- _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex));
+ _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex));
return true;
}
@@ -97,12 +116,12 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
void
gc_jd_queue_getllar(gc_eaddr_t q)
{
- // get reservation that includes the tail of the queue
- gc_eaddr_t tail = q + offsetof(gc_jd_queue_t, tail);
+ // get reservation that includes the flag in the queue
+ gc_eaddr_t ea = q + offsetof(gc_jd_queue_t, f.flag);
char _tmp[256];
char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer
- mfc_getllar(buf, ALIGN128_EA(tail), 0, 0);
+ mfc_getllar(buf, ALIGN128_EA(ea), 0, 0);
spu_readch(MFC_RdAtomicStat);
}