diff options
Diffstat (limited to 'gcell')
-rw-r--r-- | gcell/src/include/gc_jd_queue_data.h | 27 | ||||
-rw-r--r-- | gcell/src/include/spu/Makefile.am | 3 | ||||
-rw-r--r-- | gcell/src/include/spu/gc_jd_queue.h | 12 | ||||
-rw-r--r-- | gcell/src/include/spu/gc_random.h | 32 | ||||
-rw-r--r-- | gcell/src/lib/runtime/gc_jd_queue.c | 36 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_main.c | 36 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_random.c | 40 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_spu_jd_queue.c | 125 | ||||
-rw-r--r-- | gcell/src/lib/spu/Makefile.am | 4 |
9 files changed, 188 insertions, 127 deletions
diff --git a/gcell/src/include/gc_jd_queue_data.h b/gcell/src/include/gc_jd_queue_data.h index e5fa87499..3fd7270d5 100644 --- a/gcell/src/include/gc_jd_queue_data.h +++ b/gcell/src/include/gc_jd_queue_data.h @@ -35,31 +35,14 @@ __GC_BEGIN_DECLS * work. SPE's dequeue from here. * * FIXME make it lock free ;) For now, use a spin lock. + * + * (Fills a single cache line) */ - -typedef struct gc_jd_q_links -{ - gc_eaddr_t head _AL16; - gc_eaddr_t tail _AL16; -} gc_jd_q_links_t; - -typedef struct gc_jd_q_mutex -{ - uint32_t mutex; // libsync mutex (spin lock) - uint32_t _pad[31]; // pad to cache line so we can use putlluc on SPE -} _AL128 gc_jd_q_mutex_t; - -typedef struct gc_jd_q_flag -{ - uint32_t flag; // host writes this after enqueuing - uint32_t _pad[31]; // pad to cache line -} _AL128 gc_jd_q_flag_t; - typedef struct gc_jd_queue { - gc_jd_q_links_t l; - gc_jd_q_mutex_t m; - gc_jd_q_flag_t f; + gc_eaddr_t head _AL16; + gc_eaddr_t tail _AL16; + uint32_t mutex _AL16; // libsync mutex (spin lock) } _AL128 gc_jd_queue_t; __GC_END_DECLS diff --git a/gcell/src/include/spu/Makefile.am b/gcell/src/include/spu/Makefile.am index d202336f7..81a8bfdf4 100644 --- a/gcell/src/include/spu/Makefile.am +++ b/gcell/src/include/spu/Makefile.am @@ -22,4 +22,5 @@ include $(top_srcdir)/Makefile.common gcellspuinclude_HEADERS = \ gc_delay.h \ - gc_jd_queue.h + gc_jd_queue.h \ + gc_random.h diff --git a/gcell/src/include/spu/gc_jd_queue.h b/gcell/src/include/spu/gc_jd_queue.h index 7a6ac2e21..b65b15feb 100644 --- a/gcell/src/include/spu/gc_jd_queue.h +++ b/gcell/src/include/spu/gc_jd_queue.h @@ -39,20 +39,14 @@ __GC_BEGIN_DECLS * \param[out] item is local store copy of item at head of queue. * \returns false if the queue is empty, otherwise returns true * and sets \p item_ea and DMA's job descriptor into \p item + * + * If return is false, we're holding a lock-line reservation that + * covers the queue. */ bool gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, int jd_tag, gc_job_desc_t *item); - -/*! - * \brief Get a line reservation on the queue - * - * \param[in] q is EA address of queue structure. - */ -void -gc_jd_queue_getllar(gc_eaddr_t q); - __GC_END_DECLS diff --git a/gcell/src/include/spu/gc_random.h b/gcell/src/include/spu/gc_random.h new file mode 100644 index 000000000..ccb564731 --- /dev/null +++ b/gcell/src/include/spu/gc_random.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GC_RANDOM_H +#define INCLUDED_GC_RANDOM_H + +/*! + * \brief Return a uniformly distributed value in the range [0, 1.0) + * (Linear congruential generator. YMMV. Caveat emptor.) + */ + +float gc_uniform_deviate(void); +void gc_set_seed(int seed); + +#endif /* INCLUDED_GC_RANDOM_H */ diff --git a/gcell/src/lib/runtime/gc_jd_queue.c b/gcell/src/lib/runtime/gc_jd_queue.c index 29b74c29d..b5cdcac9b 100644 --- a/gcell/src/lib/runtime/gc_jd_queue.c +++ b/gcell/src/lib/runtime/gc_jd_queue.c @@ -28,10 +28,9 @@ void gc_jd_queue_init(gc_jd_queue_t *q) { - _mutex_init(ptr_to_ea(&q->m.mutex)); - q->l.head = 0; - q->l.tail = 0; - q->f.flag = 0; + _mutex_init(ptr_to_ea(&q->mutex)); + q->head = 0; + q->tail = 0; smp_wmb(); } @@ -39,44 +38,41 @@ void gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item) { item->sys.next = 0; - _mutex_lock(ptr_to_ea(&q->m.mutex)); + _mutex_lock(ptr_to_ea(&q->mutex)); smp_rmb(); // import barrier - if (q->l.tail == 0){ // currently empty - q->l.tail = q->l.head = jdp_to_ea(item); + if (q->tail == 0){ // currently empty + q->tail = q->head = jdp_to_ea(item); } else { // not empty, append - ea_to_jdp(q->l.tail)->sys.next = jdp_to_ea(item); - q->l.tail = jdp_to_ea(item); + ea_to_jdp(q->tail)->sys.next = jdp_to_ea(item); + q->tail = jdp_to_ea(item); } smp_wmb(); // orders stores above before clearing of mutex - _mutex_unlock(ptr_to_ea(&q->m.mutex)); - - // let SPE's know we wrote something if they've got a lock-line reservation - q->f.flag = 1; + _mutex_unlock(ptr_to_ea(&q->mutex)); } gc_job_desc_t * gc_jd_queue_dequeue(gc_jd_queue_t *q) { - _mutex_lock(ptr_to_ea(&q->m.mutex)); + _mutex_lock(ptr_to_ea(&q->mutex)); smp_rmb(); // import barrier - gc_eaddr_t item_ea = q->l.head; + gc_eaddr_t item_ea = q->head; if (item_ea == 0){ // empty - _mutex_unlock(ptr_to_ea(&q->m.mutex)); + _mutex_unlock(ptr_to_ea(&q->mutex)); return 0; } - q->l.head = ea_to_jdp(item_ea)->sys.next; - if (q->l.head == 0) // now emtpy - q->l.tail = 0; + q->head = ea_to_jdp(item_ea)->sys.next; + if (q->head == 0) // now emtpy + q->tail = 0; gc_job_desc_t *item = ea_to_jdp(item_ea); item->sys.next = 0; smp_wmb(); // orders stores above before clearing of mutex - _mutex_unlock(ptr_to_ea(&q->m.mutex)); + _mutex_unlock(ptr_to_ea(&q->mutex)); return item; } diff --git a/gcell/src/lib/runtime/spu/gc_main.c b/gcell/src/lib/runtime/spu/gc_main.c index 867a21de8..1e5b03de2 100644 --- a/gcell/src/lib/runtime/spu/gc_main.c +++ b/gcell/src/lib/runtime/spu/gc_main.c @@ -31,6 +31,7 @@ #include "gc_jd_queue.h" #include "gc_delay.h" #include "gc_declare_proc.h" +#include "gc_random.h" #include "spu_buffers.h" #include <string.h> #include <assert.h> @@ -195,6 +196,8 @@ backoff_reset(void) backoff = _backoff_start; } +#if 0 + static void backoff_delay(void) { @@ -204,6 +207,25 @@ backoff_delay(void) backoff = ((backoff << 1) + 1) & _backoff_cap; } +#else + +#define RANDOM_WEIGHT 0.2 + +static void +backoff_delay(void) +{ + gc_cdelay(backoff); + + backoff = ((backoff << 1) + 1); + if (backoff > _backoff_cap) + backoff = _backoff_cap; + + float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5))); + backoff = backoff * (1.0 + r); +} + +#endif + // ------------------------------------------------------------------------ static inline unsigned int @@ -565,9 +587,15 @@ main_loop(void) gc_eaddr_t jd_ea; int total_jobs = 0; +#if (USE_LLR_LOST_EVENT) // setup events spu_writech(SPU_WrEventMask, MFC_LLR_LOST_EVENT); - gc_jd_queue_getllar(spu_args.queue); // get a line reservation on the queue + + // prime the pump + while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)) + process_job(jd_ea, &jd); + // we're now holding a lock-line reservation +#endif while (1){ @@ -590,10 +618,8 @@ main_loop(void) // by somebody doing something to the queue. Go look and see // if there's anything for us. // - if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)) + while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)) process_job(jd_ea, &jd); - - gc_jd_queue_getllar(spu_args.queue); // get a new reservation } // @@ -669,6 +695,8 @@ main(unsigned long long spe_id __attribute__((unused)), // initialize pointer to procedure entry table gc_proc_def = (gc_proc_def_t *) spu_args.proc_def_ls_addr; + gc_set_seed(spu_args.spu_idx); + // initialize logging _gc_log_init(spu_args.log); diff --git a/gcell/src/lib/runtime/spu/gc_random.c b/gcell/src/lib/runtime/spu/gc_random.c new file mode 100644 index 000000000..618cc7eba --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_random.c @@ -0,0 +1,40 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <gc_random.h> + +static int last_val = 0; + +#define M 714025 // values from Numerical Recipes in C, 1988 +#define A 4096 +#define C 150889 + +void +gc_set_seed(int seed) +{ + last_val = ((unsigned int) seed) % M; +} + +float +gc_uniform_deviate(void) +{ + last_val = (last_val * A + C) % M; + return (float) last_val / (float) M; +} diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c index 0dd165fc0..6fa2d6af0 100644 --- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c +++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2007 Free Software Foundation, Inc. + * Copyright 2007,2008 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,106 +22,91 @@ #include "gc_jd_queue.h" #include "mutex_lock.h" #include "mutex_unlock.h" +#include "gc_delay.h" +#include "gc_random.h" + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) extern int gc_sys_tag; -/* - * ea must be 128-byte aligned, the mutex is in the first int32_t, and - * it must be safe to write the remaining 124 bytes with anything at - * all. - */ -static __inline void _fast_mutex_unlock(mutex_ea_t ea) +#define INITIAL_BACKOFF 32.0 +#define MAX_BACKOFF 16384.0 +#define RANDOM_WEIGHT 0.2 + +static float +next_backoff(float backoff) { - char _tmp[256]; - vector signed int *buf - = (vector signed int *) ALIGN(_tmp, 128); // get cache-aligned buffer + // exponential with random + float t = backoff * 2.0; + if (t > MAX_BACKOFF) + t = MAX_BACKOFF; - buf[0] = spu_splats(0); // the value that unlocks the mutex + float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5))); + t = t * (1.0 + r); - mfc_putlluc(buf, ea, 0, 0); // unconditional put, no reservation reqd - spu_readch(MFC_RdAtomicStat); + return t; } - - bool gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, int jd_tag, gc_job_desc_t *item) { - gc_jd_q_links_t local_q; + int status; + char _tmp[256]; + gc_jd_queue_t *local_q = + (gc_jd_queue_t *) ALIGN(_tmp, 128); // get cache-aligned buffer + + float backoff = next_backoff(INITIAL_BACKOFF); - // Before aquiring the lock, see if it's possible that there's - // something in the queue. Checking in this way makes it easier - // for the PPE to insert things, since we're not contending for - // the lock unless there is something in the queue. + do { + // Copy the queue structure in and get a lock line reservation. + // (The structure is 128-byte aligned and completely fills a cache-line) - // copy in the queue structure - mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + mfc_getllar(local_q, q, 0, 0); + spu_readch(MFC_RdAtomicStat); - if (local_q.head == 0){ // empty - return false; - } + if (local_q->mutex != 0) // somebody else has it locked + return false; - // When we peeked, head was non-zero. Now grab the - // lock and do it for real. + if (local_q->head == 0) // the queue is empty + return false; - _mutex_lock(q + offsetof(gc_jd_queue_t, m.mutex)); + // Try to acquire the lock - // copy in the queue structure - mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + local_q->mutex = 1; + mfc_putllc(local_q, q, 0, 0); + status = spu_readch(MFC_RdAtomicStat); + + if (status != 0){ + gc_cdelay((int) backoff); + backoff = next_backoff(backoff); + } - if (local_q.head == 0){ // empty - _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex)); - return false; - } + } while (status != 0); + // we're now holding the lock + // copy in job descriptor at head of queue - *item_ea = local_q.head; + *item_ea = local_q->head; // We must use the fence with the jd_tag to ensure that any // previously initiated put of a job desc is locally ordered before // the get of the new one. - mfc_getf(item, local_q.head, sizeof(gc_job_desc_t), jd_tag, 0, 0); + mfc_getf(item, local_q->head, sizeof(gc_job_desc_t), jd_tag, 0, 0); mfc_write_tag_mask(1 << jd_tag); // the tag we're interested in mfc_read_tag_status_all(); // wait for DMA to complete - local_q.head = item->sys.next; + local_q->head = item->sys.next; item->sys.next = 0; - if (local_q.head == 0) // now empty? - local_q.tail = 0; - + if (local_q->head == 0) // now empty? + local_q->tail = 0; - // copy the queue structure back out - mfc_put(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + // Copy the queue struct back out and unlock the mutex in one fell swoop. + // We use the unconditional put since it's faster and we own the lock. - // Q: FIXME do we need to order stores in EA or can we just clear the - // local copy of the mutex above and blast it out, removing the need - // for this explicit unlock? - // - // A: Manual says it's better to use an atomic op rather than - // a normal DMA, and that a putlluc is better than a putllc if - // you can use it. + local_q->mutex = 0; + mfc_putlluc(local_q, q, 0, 0); + spu_readch(MFC_RdAtomicStat); - _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex)); return true; } - - -void -gc_jd_queue_getllar(gc_eaddr_t q) -{ - // get reservation that includes the flag in the queue - gc_eaddr_t ea = q + offsetof(gc_jd_queue_t, f.flag); - - char _tmp[256]; - char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer - - mfc_getllar(buf, ALIGN128_EA(ea), 0, 0); - spu_readch(MFC_RdAtomicStat); -} diff --git a/gcell/src/lib/spu/Makefile.am b/gcell/src/lib/spu/Makefile.am index 61ef7c8f4..30385f9b4 100644 --- a/gcell/src/lib/spu/Makefile.am +++ b/gcell/src/lib/spu/Makefile.am @@ -44,7 +44,9 @@ runtime_spu_sources = \ $(runtime_srcdir)/gc_spu_jd_queue.c \ $(runtime_srcdir)/spu_buffers.c \ $(runtime_srcdir)/gc_logging.c \ - $(runtime_srcdir)/gc_main.c + $(runtime_srcdir)/gc_main.c \ + $(runtime_srcdir)/gc_random.c + runtime_spu_headers = |