summaryrefslogtreecommitdiff
path: root/gcell
diff options
context:
space:
mode:
Diffstat (limited to 'gcell')
-rw-r--r--gcell/apps/Makefile.am8
-rw-r--r--gcell/apps/benchmark_roundtrip.cc240
-rwxr-xr-xgcell/apps/plot_speedup.py1
-rw-r--r--gcell/include/gcell/gc_mbox.h7
-rw-r--r--gcell/include/gcell/spu/gc_jd_queue.h12
-rw-r--r--gcell/lib/runtime/gc_job_manager_impl.cc190
-rw-r--r--gcell/lib/runtime/gc_job_manager_impl.h36
-rw-r--r--gcell/lib/runtime/spu/gc_main.c81
-rw-r--r--gcell/lib/runtime/spu/gc_random.c14
-rw-r--r--gcell/lib/runtime/spu/gc_spu_config.h16
-rw-r--r--gcell/lib/runtime/spu/gc_spu_jd_queue.c27
-rw-r--r--gcell/lib/wrapper/Makefile.am13
-rw-r--r--gcell/lib/wrapper/qa_gcell_wrapper.cc6
13 files changed, 563 insertions, 88 deletions
diff --git a/gcell/apps/Makefile.am b/gcell/apps/Makefile.am
index e6768523c..7cf9122a3 100644
--- a/gcell/apps/Makefile.am
+++ b/gcell/apps/Makefile.am
@@ -1,5 +1,5 @@
#
-# Copyright 2007,2008 Free Software Foundation, Inc.
+# Copyright 2007,2008,2009 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
@@ -34,7 +34,8 @@ TESTS = test_all
bin_PROGRAMS = \
test_all \
benchmark_dma \
- benchmark_nop
+ benchmark_nop \
+ benchmark_roundtrip
test_all_SOURCES = test_all.cc
@@ -45,3 +46,6 @@ benchmark_dma_LDADD = spu/benchmark_procs $(GCELL_LA)
benchmark_nop_SOURCES = benchmark_nop.cc
benchmark_nop_LDADD = spu/benchmark_procs $(GCELL_LA)
+
+benchmark_roundtrip_SOURCES = benchmark_roundtrip.cc
+benchmark_roundtrip_LDADD = spu/benchmark_procs $(GCELL_LA)
diff --git a/gcell/apps/benchmark_roundtrip.cc b/gcell/apps/benchmark_roundtrip.cc
new file mode 100644
index 000000000..85af03a08
--- /dev/null
+++ b/gcell/apps/benchmark_roundtrip.cc
@@ -0,0 +1,240 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#if defined(HAVE_CONFIG_H)
+#include <config.h>
+#endif
+#include <gcell/gc_job_manager.h>
+#include <omni_time.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <boost/scoped_array.hpp>
+#include <assert.h>
+
+// handle to embedded SPU executable that contains benchmark routines
+// (The name of the variable (benchmark_procs) is the name of the spu executable.)
+extern spe_program_handle_t benchmark_procs;
+
+static gc_proc_id_t gcp_benchmark_udelay = GCP_UNKNOWN_PROC;
+
+#define BENCHMARK_PUT 0x1
+#define BENCHMARK_GET 0x2
+#define BENCHMARK_GET_PUT (BENCHMARK_PUT|BENCHMARK_GET)
+
+
+#if 0
+static bool
+power_of_2_p(unsigned long x)
+{
+ int nbits = sizeof(x) * 8;
+ for (int i = 0; i < nbits; i++)
+ if (x == (1UL << i))
+ return true;
+
+ return false;
+}
+#endif
+
+static void
+init_jd(gc_job_desc *jd, unsigned int usecs,
+ unsigned char *getbuf, unsigned char *putbuf, size_t buflen,
+ int getput_mask)
+{
+ jd->proc_id = gcp_benchmark_udelay;
+ jd->input.nargs = 1;
+ jd->input.arg[0].u32 = usecs;
+ jd->output.nargs = 0;
+
+ switch(getput_mask & BENCHMARK_GET_PUT){
+
+ case BENCHMARK_GET:
+ jd->eaa.nargs = 1;
+ jd->eaa.arg[0].direction = GCJD_DMA_GET;
+ jd->eaa.arg[0].ea_addr = ptr_to_ea(getbuf);
+ jd->eaa.arg[0].get_size = buflen;
+ break;
+
+ case BENCHMARK_PUT:
+ jd->eaa.nargs = 1;
+ jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+ jd->eaa.arg[0].ea_addr = ptr_to_ea(putbuf);
+ jd->eaa.arg[0].put_size = buflen;
+ break;
+
+ case BENCHMARK_GET_PUT:
+ jd->eaa.nargs = 2;
+ jd->eaa.arg[0].direction = GCJD_DMA_GET;
+ jd->eaa.arg[0].ea_addr = ptr_to_ea(getbuf);
+ jd->eaa.arg[0].get_size = buflen;
+ jd->eaa.arg[1].direction = GCJD_DMA_PUT;
+ jd->eaa.arg[1].ea_addr = ptr_to_ea(putbuf);
+ jd->eaa.arg[1].put_size = buflen;
+ break;
+ }
+}
+
+static void
+run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size,
+ int getput_mask, int njobs_at_once)
+{
+ int NJDS = njobs_at_once;
+ gc_job_desc *all_jds[NJDS];
+ bool done[NJDS];
+
+ static const unsigned int BUFSIZE = (32 << 10) * NJDS;
+ unsigned char *getbuf = new unsigned char[BUFSIZE];
+ boost::scoped_array<unsigned char> _getbuf(getbuf);
+ unsigned char *putbuf = new unsigned char[BUFSIZE];
+ boost::scoped_array<unsigned char> _putbuf(putbuf);
+ int gbi = 0;
+
+ // touch all pages to force allocation now
+ for (unsigned int i = 0; i < BUFSIZE; i += 4096){
+ getbuf[i] = 0;
+ putbuf[i] = 0;
+ }
+
+ gc_jm_options opts;
+ opts.program_handle = gc_program_handle_from_address(&benchmark_procs);
+ opts.nspes = nspes;
+ //opts.enable_logging = true;
+ //opts.log2_nlog_entries = 13;
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+ if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == GCP_UNKNOWN_PROC){
+ fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n");
+ return;
+ }
+
+ // allocate and init all job descriptors
+ for (int i = 0; i < NJDS; i++){
+ if (gbi + dma_size > BUFSIZE)
+ gbi = 0;
+
+ all_jds[i] = mgr->alloc_job_desc();
+ if (all_jds[i] == 0){
+ fprintf(stderr, "alloc_job_desc() returned 0\n");
+ return;
+ }
+ init_jd(all_jds[i], usecs, &getbuf[gbi], &putbuf[gbi], dma_size, getput_mask);
+ gbi += dma_size;
+ }
+
+ int niter = 100000;
+ omni_time t_start = omni_time::time();
+
+ for (int iter = 0; iter < niter; iter++){
+
+ // submit the jobs
+ for (int i = 0; i < NJDS; i++){
+ if (!mgr->submit_job(all_jds[i])){
+ printf("submit_job(jds[%d]) failed, status = %d\n",
+ i, all_jds[i]->status);
+ }
+ }
+
+ int n = mgr->wait_jobs(NJDS, all_jds, done, GC_WAIT_ALL);
+ if (n < 0){
+ fprintf(stderr, "mgr->wait_jobs failed\n");
+ break;
+ }
+ if (n != NJDS){
+ fprintf(stderr, "mgr->wait_jobs returned short count. Expected %d, got %d\n",
+ NJDS, n);
+ }
+ }
+
+ // stop timing
+ omni_time t_stop = omni_time::time();
+ double delta = (t_stop - t_start).double_time();
+ printf("nspes: %2d udelay: %4d elapsed_time: %7.3f dma_size: %5d dma_throughput: %7.3e round_trip: %gus\n",
+ mgr->nspes(), usecs, delta, dma_size,
+ (double) NJDS * niter * dma_size / delta * (getput_mask == BENCHMARK_GET_PUT ? 2.0 : 1.0),
+ delta / niter * 1e6);
+}
+
+static void
+usage()
+{
+ fprintf(stderr, "usage: benchmark_dma [-p] [-g] [-n <nspes>] [-u <udelay>] [-s <dma_size>] [-N <njobs_at_a_time>]\n");
+ fprintf(stderr, " you must specify one or both of -p (put) and -g (get)\n");
+}
+
+
+int
+main(int argc, char **argv)
+{
+ unsigned int nspes = 0;
+ unsigned int usecs = 0;
+ unsigned int dma_size = 32 << 10;
+ int njobs_at_once = -1;
+ int getput_mask = 0;
+ int ch;
+
+ while ((ch = getopt(argc, argv, "n:u:s:pgN:")) != EOF){
+ switch(ch){
+ case 'n':
+ nspes = strtol(optarg, 0, 0);
+ break;
+
+ case 'u':
+ usecs = strtol(optarg, 0, 0);
+ break;
+
+ case 'N':
+ njobs_at_once = strtol(optarg, 0, 0);
+ break;
+
+ case 's':
+ dma_size = strtol(optarg, 0, 0);
+ if (dma_size == 0){
+ fprintf(stderr, "-s <dma_size> must be > 0\n");
+ return 1;
+ }
+ break;
+
+ case 'p':
+ getput_mask |= BENCHMARK_PUT;
+ break;
+
+ case 'g':
+ getput_mask |= BENCHMARK_GET;
+ break;
+
+ case '?':
+ default:
+ usage();
+ return 1;
+ }
+ }
+
+ if (njobs_at_once < 0)
+ njobs_at_once = nspes;
+
+ if (getput_mask == 0){
+ usage();
+ return 1;
+ }
+
+ run_test(nspes, usecs, dma_size, getput_mask, njobs_at_once);
+ return 0;
+}
diff --git a/gcell/apps/plot_speedup.py b/gcell/apps/plot_speedup.py
index 96277f85c..37822a73f 100755
--- a/gcell/apps/plot_speedup.py
+++ b/gcell/apps/plot_speedup.py
@@ -36,6 +36,7 @@ class plot_data(object):
self.markers = {
+ 5 : 'x',
10 : 'o',
50 : 's',
100 : '^',
diff --git a/gcell/include/gcell/gc_mbox.h b/gcell/include/gcell/gc_mbox.h
index 1d577ff8f..9793401f1 100644
--- a/gcell/include/gcell/gc_mbox.h
+++ b/gcell/include/gcell/gc_mbox.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -41,12 +41,13 @@
#define OP_EXIT 0x0 // exit now
#define OP_GET_SPU_BUFSIZE 0x1
+#define OP_CHECK_QUEUE 0x2
// SPE to PPE (sent via SPE Write Outbound Interrupt Mailbox)
-#define OP_JOBS_DONE 0x2 // arg is 0 or 1, indicating which
+#define OP_JOBS_DONE 0x3 // arg is 0 or 1, indicating which
// gc_completion_info_t contains the info
-#define OP_SPU_BUFSIZE 0x3 // arg is max number of bytes
+#define OP_SPU_BUFSIZE 0x4 // arg is max number of bytes
#endif /* INCLUDED_GCELL_GC_MBOX_H */
diff --git a/gcell/include/gcell/spu/gc_jd_queue.h b/gcell/include/gcell/spu/gc_jd_queue.h
index ce1977c94..c2300478c 100644
--- a/gcell/include/gcell/spu/gc_jd_queue.h
+++ b/gcell/include/gcell/spu/gc_jd_queue.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,6 +30,12 @@
__GC_BEGIN_DECLS
+typedef enum {
+ GCQ_OK, // Got an item
+ GCQ_EMPTY, // Q is empty
+ GCQ_LOCKED, // Somebody else has the queue locked
+} gc_dequeue_status_t;
+
/*!
* \brief Remove and return item at head of queue.
*
@@ -40,10 +46,10 @@ __GC_BEGIN_DECLS
* \returns false if the queue is empty, otherwise returns true
* and sets \p item_ea and DMA's job descriptor into \p item
*
- * If return is false, we're holding a lock-line reservation that
+ * If return is not GCQ_OK, we're holding a lock-line reservation that
* covers the queue.
*/
-bool
+gc_dequeue_status_t
gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
int jd_tag, gc_job_desc_t *item);
diff --git a/gcell/lib/runtime/gc_job_manager_impl.cc b/gcell/lib/runtime/gc_job_manager_impl.cc
index 629019f4d..4469d5023 100644
--- a/gcell/lib/runtime/gc_job_manager_impl.cc
+++ b/gcell/lib/runtime/gc_job_manager_impl.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -37,8 +37,27 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
+#include <sched.h>
+#define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
+#define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
+#define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
+#define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
+#define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
+#define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
+#define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
+#define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
+
+
+#if 1
+#define CCTPL() __cctpl()
+#define CCTPM() __cctpm()
+#else
+#define CCTPL() (void) 0
+#define CCTPM() (void) 0
+#endif
+
static const size_t CACHE_LINE_SIZE = 128;
static const unsigned int DEFAULT_MAX_JOBS = 128;
@@ -99,6 +118,8 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
: d_debug(0), d_spu_args(0),
d_eh_cond(&d_eh_mutex), d_eh_thread(0), d_eh_state(EHS_INIT),
d_shutdown_requested(false),
+ d_jc_cond(&d_jc_mutex), d_jc_thread(0), d_jc_state(JCS_INIT), d_jc_njobs_active(0),
+ d_ntell(0), d_tell_start(0),
d_client_thread(0), d_ea_args_maxsize(0),
d_proc_def(0), d_proc_def_ls_addr(0), d_nproc_defs(0)
{
@@ -177,6 +198,8 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
}
}
+ d_ntell = std::min(d_options.nspes, 2U);
+
// ----------------------------------------------------------------
// initalize the job queue
@@ -218,6 +241,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// fprintf(stderr, "d_proc_def_ls_addr = 0x%0x\n", d_proc_def_ls_addr);
int spe_flags = (SPE_EVENTS_ENABLE
+ | SPE_MAP_PS
| SPE_CFG_SIGNOTIFY1_OR
| SPE_CFG_SIGNOTIFY2_OR);
@@ -228,6 +252,14 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
perror("spe_context_create");
throw std::runtime_error("spe_context_create");
}
+
+ d_worker[i].spe_ctrl =
+ (spe_spu_control_area_t *)spe_ps_area_get(d_worker[i].spe_ctx, SPE_CONTROL_AREA);
+ if (d_worker[i].spe_ctrl == 0){
+ perror("spe_ps_area_get(SPE_CONTROL_AREA)");
+ throw std::runtime_error("spe_ps_area_get(SPE_CONTROL_AREA)");
+ }
+
d_worker[i].spe_idx = i;
d_worker[i].spu_args = &d_spu_args[i];
d_worker[i].spu_args->queue = ptr_to_ea(d_queue);
@@ -315,7 +347,6 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// create the spe event handler & worker (SPE) threads
create_event_handler();
-
}
////////////////////////////////////////////////////////////////////////
@@ -339,7 +370,11 @@ gc_job_manager_impl::shutdown()
{
omni_mutex_lock l(d_eh_mutex);
- d_shutdown_requested = true; // set flag for event handler thread
+ {
+ omni_mutex_lock l2(d_jc_mutex);
+ d_shutdown_requested = true; // set flag for event handler thread
+ d_jc_cond.signal(); // wake up job completer
+ }
// should only happens during early QA code
if (d_eh_thread == 0 && d_eh_state == EHS_INIT)
@@ -420,6 +455,29 @@ gc_job_manager_impl::free_job_desc(gc_job_desc *jd)
////////////////////////////////////////////////////////////////////////
+
+inline bool
+gc_job_manager_impl::incr_njobs_active()
+{
+ omni_mutex_lock l(d_jc_mutex);
+
+ if (d_shutdown_requested)
+ return false;
+
+ if (d_jc_njobs_active++ == 0) // signal on 0 to 1 transition
+ d_jc_cond.signal();
+
+ return true;
+}
+
+inline void
+gc_job_manager_impl::decr_njobs_active(int n)
+{
+ omni_mutex_lock l(d_jc_mutex);
+ d_jc_njobs_active -= n;
+}
+
+
/*
* We check as much as we can here on the PPE side, so that the SPE
* doesn't have to.
@@ -475,11 +533,6 @@ check_ea_args(gc_job_desc *jd, gc_job_ea_args *p)
bool
gc_job_manager_impl::submit_job(gc_job_desc *jd)
{
- if (unlikely(d_shutdown_requested)){
- jd->status = JS_SHUTTING_DOWN;
- return false;
- }
-
// Ensure it's one of our job descriptors
if (jd < d_jd || jd >= &d_jd[d_options.max_jobs]){
@@ -522,9 +575,13 @@ gc_job_manager_impl::submit_job(gc_job_desc *jd)
jd->status = JS_OK;
jd->sys.client_id = cti->d_client_id;
- // FIXME keep count of jobs in progress?
+ if (!incr_njobs_active()){
+ jd->status = JS_SHUTTING_DOWN;
+ return false;
+ }
gc_jd_queue_enqueue(d_queue, jd);
+ // tell_spes_to_check_queue();
return true;
}
@@ -628,6 +685,27 @@ gc_job_manager_impl::send_spe(unsigned int spe, uint32_t msg)
return r == 1;
}
+void
+gc_job_manager_impl::tell_spes_to_check_queue()
+{
+ int nspes = d_options.nspes;
+
+ for (int i = 0, ntold = 0; ntold < d_ntell && i < nspes ; ++i){
+ volatile spe_spu_control_area_t *spe_ctrl = d_worker[d_tell_start].spe_ctrl;
+ int nfree = (spe_ctrl->SPU_Mbox_Stat >> 8) & 0xFF;
+ if (nfree == 4){
+ spe_ctrl->SPU_In_Mbox = MK_MBOX_MSG(OP_CHECK_QUEUE, 0);
+ ntold++;
+ }
+
+ unsigned int t = d_tell_start + 1;
+ if (t >= d_options.nspes)
+ t = 0;
+ d_tell_start = t;
+ }
+}
+
+
////////////////////////////////////////////////////////////////////////
static void
@@ -685,6 +763,14 @@ start_event_handler(void *arg)
return 0;
}
+static void *
+start_job_completer(void *arg)
+{
+ gc_job_manager_impl *p = (gc_job_manager_impl *) arg;
+ p->job_completer_loop();
+ return 0;
+}
+
void
gc_job_manager_impl::create_event_handler()
{
@@ -709,12 +795,18 @@ gc_job_manager_impl::create_event_handler()
}
}
- // create our event handling thread
+ // create the event handling thread
if (!start_thread(&d_eh_thread, start_event_handler, this, "event_handler")){
throw std::runtime_error("pthread_create");
}
+ // create the job completion thread
+
+ if (!start_thread(&d_jc_thread, start_job_completer, this, "job_completer")){
+ throw std::runtime_error("pthread_create");
+ }
+
// create the SPE worker threads
bool ok = true;
@@ -805,6 +897,8 @@ gc_job_manager_impl::notify_clients_jobs_are_done(unsigned int spe_num,
return;
}
+ decr_njobs_active(ci->ncomplete);
+
if (0){
static int total_jobs;
static int total_msgs;
@@ -902,12 +996,13 @@ gc_job_manager_impl::handle_event(spe_event_unit_t *evt)
else {
for (int i = 0; i < n; i++){
switch(MBOX_MSG_OP(msg[i])){
+#if 0
case OP_JOBS_DONE:
if (debug())
printf("eh: job_done (0x%08x) from spu[%d]\n", msg[i], spe_num);
notify_clients_jobs_are_done(spe_num, MBOX_MSG_ARG(msg[i]));
break;
-
+#endif
case OP_SPU_BUFSIZE:
set_ea_args_maxsize(MBOX_MSG_ARG(msg[i]));
break;
@@ -1001,18 +1096,17 @@ gc_job_manager_impl::event_handler_loop()
while (1){
switch(d_eh_state){
- case EHS_RUNNING: // normal stuff
+ case EHS_RUNNING: // normal stuff
if (d_shutdown_requested) {
set_eh_state(EHS_SHUTTING_DOWN);
}
break;
case EHS_SHUTTING_DOWN:
-
- // FIXME wait until job queue is empty, then tell them to exit
-
- send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
- set_eh_state(EHS_WAITING_FOR_WORKERS_TO_DIE);
+ if (d_jc_state == JCS_DEAD){
+ send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
+ set_eh_state(EHS_WAITING_FOR_WORKERS_TO_DIE);
+ }
break;
case EHS_WAITING_FOR_WORKERS_TO_DIE:
@@ -1050,7 +1144,67 @@ gc_job_manager_impl::event_handler_loop()
}
////////////////////////////////////////////////////////////////////////
-// This is the top of the SPE worker threads
+
+void
+gc_job_manager_impl::poll_for_job_completion()
+{
+ static const int niter = 10000;
+
+ CCTPL(); // change current (h/w) thread priority to low
+
+ for (int n = 0; n < niter; n++){
+
+ for (unsigned int spe_num = 0; spe_num < d_options.nspes; spe_num++){
+ volatile spe_spu_control_area_t *spe_ctrl = d_worker[spe_num].spe_ctrl;
+ int nentries = spe_ctrl->SPU_Mbox_Stat & 0xFF;
+ while (nentries-- > 0){
+ unsigned int msg = spe_ctrl->SPU_Out_Mbox;
+ switch(MBOX_MSG_OP(msg)){
+ case OP_JOBS_DONE:
+ if (debug())
+ printf("jc: job_done (0x%08x) from spu[%d]\n", msg, spe_num);
+
+ CCTPM(); // change current thread priority to medium
+ notify_clients_jobs_are_done(spe_num, MBOX_MSG_ARG(msg));
+ CCTPL();
+ break;
+
+ default:
+ printf("jc: Unexpected msg (0x%08x) from spu[%d]\n", msg, spe_num);
+ break;
+ }
+ }
+ }
+ }
+ CCTPM();
+}
+
+//
+// This is the "main program" of the job completer thread
+//
+void
+gc_job_manager_impl::job_completer_loop()
+{
+ d_jc_state = JCS_RUNNING;
+
+ while (1){
+ {
+ omni_mutex_lock l(d_jc_mutex);
+ if (d_jc_njobs_active == 0){
+ if (d_shutdown_requested){
+ d_jc_state = JCS_DEAD;
+ return;
+ }
+ d_jc_cond.wait();
+ }
+ }
+
+ poll_for_job_completion();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////
+// this is the top of the SPE worker threads
static void *
start_worker(void *arg)
diff --git a/gcell/lib/runtime/gc_job_manager_impl.h b/gcell/lib/runtime/gc_job_manager_impl.h
index dad7873ab..07dcabaf2 100644
--- a/gcell/lib/runtime/gc_job_manager_impl.h
+++ b/gcell/lib/runtime/gc_job_manager_impl.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -44,14 +44,15 @@ enum worker_state {
};
struct worker_ctx {
- volatile worker_state state;
- unsigned int spe_idx; // [0, nspes-1]
- spe_context_ptr_t spe_ctx;
- pthread_t thread;
- gc_spu_args_t *spu_args; // pointer to 16-byte aligned struct
+ volatile worker_state state;
+ unsigned int spe_idx; // [0, nspes-1]
+ spe_context_ptr_t spe_ctx;
+ spe_spu_control_area_t *spe_ctrl;
+ pthread_t thread;
+ gc_spu_args_t *spu_args; // pointer to 16-byte aligned struct
worker_ctx()
- : state(WS_FREE), spe_idx(0), spe_ctx(0),
+ : state(WS_FREE), spe_idx(0), spe_ctx(0), spe_ctrl(0),
thread(0), spu_args(0) {}
~worker_ctx();
};
@@ -64,6 +65,12 @@ enum evt_handler_state {
EHS_DEAD, // thread is dead
};
+enum job_completer_state {
+ JCS_INIT, // being initialized
+ JCS_RUNNING, // thread is running
+ JCS_DEAD, // thread is dead
+};
+
struct spe_event_handler {
spe_event_handler_ptr_t ptr;
@@ -107,6 +114,16 @@ class gc_job_manager_impl : public gc_job_manager
volatile bool d_shutdown_requested;
spe_event_handler d_spe_event_handler;
+ // used to coordinate communication w/ the job completer thread
+ omni_mutex d_jc_mutex;
+ omni_condition d_jc_cond;
+ pthread_t d_jc_thread; // the job completion thread
+ volatile job_completer_state d_jc_state;
+ int d_jc_njobs_active; // # of jobs submitted but not yet reaped
+
+ // round robin notification of spes
+ int d_ntell; // # of spes to tell
+ unsigned int d_tell_start; // which one to start with
// All of the job descriptors are hung off of here.
// We allocate them all in a single cache aligned chunk.
@@ -150,12 +167,17 @@ class gc_job_manager_impl : public gc_job_manager
public:
void event_handler_loop(); // really private
+ void job_completer_loop(); // really private
private:
bool send_all_spes(uint32_t msg);
bool send_spe(unsigned int spe, uint32_t msg);
void print_event(spe_event_unit_t *evt);
void handle_event(spe_event_unit_t *evt);
+ bool incr_njobs_active();
+ void decr_njobs_active(int n);
+ void tell_spes_to_check_queue();
+ void poll_for_job_completion();
// bitvector ops
void bv_zero(unsigned long *bv);
diff --git a/gcell/lib/runtime/spu/gc_main.c b/gcell/lib/runtime/spu/gc_main.c
index 0866c3c3d..ba56e40f6 100644
--- a/gcell/lib/runtime/spu/gc_main.c
+++ b/gcell/lib/runtime/spu/gc_main.c
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -49,6 +49,10 @@
#define ROUND_UP(x, p2) (((x)+((p2)-1)) & ~((p2)-1))
+//#define OUT_MBOX_CHANNEL SPU_WrOutIntrMbox
+#define OUT_MBOX_CHANNEL SPU_WrOutMbox
+
+#define CHECK_QUEUE_ON_MSG 0 // define to 0 or 1
#define USE_LLR_LOST_EVENT 0 // define to 0 or 1
int gc_sys_tag; // tag for misc DMA operations
@@ -101,7 +105,7 @@ wait_for_ppe_to_be_done_with_comp_info(int idx)
if (p->in_use == 0)
return;
- gc_udelay(5);
+ gc_udelay(1);
} while (1);
}
@@ -143,7 +147,7 @@ flush_completion_info(void)
put_in_progress, ci_idx, comp_info.ncomplete, total_complete);
// send PPE a message
- spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
+ spu_writech(OUT_MBOX_CHANNEL, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
ci_idx ^= 0x1; // switch buffers
comp_info.in_use = 1;
@@ -152,6 +156,7 @@ flush_completion_info(void)
// ------------------------------------------------------------------------
+
static unsigned int backoff; // current backoff value in clock cycles
static unsigned int _backoff_start;
static unsigned int _backoff_cap;
@@ -159,6 +164,8 @@ static unsigned int _backoff_cap;
/*
* For 3.2 GHz SPE
*
+ * 10 1023 cycles 320 ns
+ * 11 2047 cycle 640 ns
* 12 4095 cycles 1.3 us
* 13 8191 cycles 2.6 us
* 14 16383 cycles 5.1 us
@@ -173,13 +180,15 @@ static unsigned int _backoff_cap;
static unsigned char log2_backoff_start[16] = {
// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// -------------------------------------------------------------
- 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+//12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11
};
static unsigned char log2_backoff_cap[16] = {
// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// -------------------------------------------------------------
- 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+//17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+ 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16
};
static void
@@ -191,24 +200,14 @@ backoff_init(void)
backoff = _backoff_start;
}
+#if !CHECK_QUEUE_ON_MSG
+
static void
backoff_reset(void)
{
backoff = _backoff_start;
}
-#if 0
-
-static void
-backoff_delay(void)
-{
- gc_cdelay(backoff);
-
- // capped exponential backoff
- backoff = ((backoff << 1) + 1) & _backoff_cap;
-}
-
-#else
#define RANDOM_WEIGHT 0.2
@@ -217,15 +216,17 @@ backoff_delay(void)
{
gc_cdelay(backoff);
+ // capped exponential backoff
backoff = ((backoff << 1) + 1);
if (backoff > _backoff_cap)
backoff = _backoff_cap;
+ // plus some randomness
float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5)));
backoff = backoff * (1.0 + r);
}
-#endif
+#endif // !CHECK_QUEUE_ON_MSG
// ------------------------------------------------------------------------
@@ -600,6 +601,7 @@ main_loop(void)
while (1){
+#if !CHECK_QUEUE_ON_MSG
#if (USE_LLR_LOST_EVENT)
if (unlikely(spu_readchcnt(SPU_RdEventStat))){
@@ -619,7 +621,7 @@ main_loop(void)
// by somebody doing something to the queue. Go look and see
// if there's anything for us.
//
- while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
+ while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd) == GCQ_OK)
process_job(jd_ea, &jd);
}
@@ -632,7 +634,7 @@ main_loop(void)
#else
// try to get a job from the job queue
- if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){
+ if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd) == GCQ_OK){
total_jobs++;
gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
@@ -645,17 +647,45 @@ main_loop(void)
backoff_delay();
#endif
+#endif
// any msgs for us?
if (unlikely(spu_readchcnt(SPU_RdInMbox))){
int msg = spu_readch(SPU_RdInMbox);
// printf("spu[%d] mbox_msg: 0x%08x\n", spu_args.spu_idx, msg);
+#if CHECK_QUEUE_ON_MSG
+ if (MBOX_MSG_OP(msg) == OP_CHECK_QUEUE){
+
+ while (1){
+ //int delay = (int)(3200.0 * gc_uniform_deviate()); // uniformly in [0, 1.0us]
+ //gc_cdelay(delay);
+
+ gc_dequeue_status_t s =
+ gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd);
+
+ if (s == GCQ_OK){
+ total_jobs++;
+ gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
+
+ process_job(jd_ea, &jd);
+
+ gc_log_write2(GCL_SS_SYS, 0x11, jd.sys.job_id, total_jobs);
+ }
+ else if (s == GCQ_EMPTY){
+ break;
+ }
+ else { // GCQ_LOCKED -- keep trying
+ }
+ }
+ }
+ else
+#endif
if (MBOX_MSG_OP(msg) == OP_EXIT){
flush_completion_info();
return;
}
- if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
+ else if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_SPU_BUFSIZE, GC_SPU_BUFSIZE_BASE));
}
}
@@ -663,7 +693,7 @@ main_loop(void)
// If we've got job completion info for the PPE and we can send a
// message without blocking, do it.
- if (comp_info.ncomplete != 0 && spu_readchcnt(SPU_WrOutIntrMbox) != 0){
+ if (comp_info.ncomplete != 0 && spu_readchcnt(OUT_MBOX_CHANNEL) != 0){
gc_log_write0(GCL_SS_SYS, 0x12);
flush_completion_info();
}
@@ -681,13 +711,6 @@ main(unsigned long long spe_id __attribute__((unused)),
ci_tags = mfc_multi_tag_reserve(2);
put_tags = mfc_multi_tag_reserve(2);
-#if 0
- printf("gc_sys_tag = %d\n", gc_sys_tag);
- printf("get_tag = %d\n", get_tag);
- printf("ci_tags = %d\n", ci_tags);
- printf("put_tags = %d\n", put_tags);
-#endif
-
// dma the args in
mfc_get(&spu_args, argp, sizeof(spu_args), gc_sys_tag, 0, 0);
mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
diff --git a/gcell/lib/runtime/spu/gc_random.c b/gcell/lib/runtime/spu/gc_random.c
index 214309b53..ae2fde875 100644
--- a/gcell/lib/runtime/spu/gc_random.c
+++ b/gcell/lib/runtime/spu/gc_random.c
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2008 Free Software Foundation, Inc.
+ * Copyright 2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,9 @@
static int last_val = 0;
-#define M 714025 // values from Numerical Recipes in C, 1988
-#define A 4096
-#define C 150889
+# define M 259200 // values from Numerical Recipes in C, 1988
+# define A 7141
+# define C 54773
void
gc_set_seed(int seed)
@@ -32,9 +32,13 @@ gc_set_seed(int seed)
last_val = ((unsigned int) seed) % M;
}
+/*
+ * Return a uniformly distributed value in the range [0, 1.0)
+ * (Linear congruential generator. YMMV. Caveat emptor.)
+ */
float
gc_uniform_deviate(void)
{
last_val = (last_val * A + C) % M;
- return (float) last_val / (float) M;
+ return (float) last_val * (1.0f / (float) M);
}
diff --git a/gcell/lib/runtime/spu/gc_spu_config.h b/gcell/lib/runtime/spu/gc_spu_config.h
index d0b131e82..6320e6dbe 100644
--- a/gcell/lib/runtime/spu/gc_spu_config.h
+++ b/gcell/lib/runtime/spu/gc_spu_config.h
@@ -1,6 +1,6 @@
-/* -*- c++ -*- */
+/* -*- c -*- */
/*
- * Copyright 2008 Free Software Foundation, Inc.
+ * Copyright 2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -24,10 +24,16 @@
#include <gcell/gc_job_desc.h>
#define CACHE_LINE_SIZE 128 // in bytes
-#define GC_SPU_BUFSIZE_BASE (40 * 1024) // must be multiple of CACHE_LINE_SIZE
+
+#if 1
+# define GC_SPU_BUFSIZE_BASE (40 * 1024) // must be multiple of CACHE_LINE_SIZE
+#else
+# define GC_SPU_BUFSIZE_BASE (20 * 1024) // must be multiple of CACHE_LINE_SIZE
+#endif
+
#define GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * CACHE_LINE_SIZE)
-#define NGETBUFS 1 // single buffer job arg gets
-#define NPUTBUFS 2 // double buffer job arg puts
+#define NGETBUFS 1 // gets are single buffered
+#define NPUTBUFS 2 // puts are double buffered
#endif /* INCLUDED_GCELL_GC_SPU_CONFIG_H */
diff --git a/gcell/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/lib/runtime/spu/gc_spu_jd_queue.c
index 42deac34e..91bb5bc7e 100644
--- a/gcell/lib/runtime/spu/gc_spu_jd_queue.c
+++ b/gcell/lib/runtime/spu/gc_spu_jd_queue.c
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007,2008 Free Software Foundation, Inc.
+ * Copyright 2007,2008,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -29,8 +29,14 @@
extern int gc_sys_tag;
+// keep track of stats
+int jdq_ok;
+int jdq_empty;
+int jdq_locked;
+
+
#define INITIAL_BACKOFF 32.0
-#define MAX_BACKOFF 16384.0
+#define MAX_BACKOFF 8192.0 /* 2.6us */
#define RANDOM_WEIGHT 0.2
static float
@@ -47,7 +53,7 @@ next_backoff(float backoff)
return t;
}
-bool
+gc_dequeue_status_t
gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
int jd_tag, gc_job_desc_t *item)
{
@@ -65,11 +71,15 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
mfc_getllar(local_q, q, 0, 0);
spu_readch(MFC_RdAtomicStat);
- if (local_q->mutex != 0) // somebody else has it locked
- return false;
+ if (local_q->mutex != 0){ // somebody else has it locked
+ jdq_locked++;
+ return GCQ_LOCKED;
+ }
- if (local_q->head == 0) // the queue is empty
- return false;
+ if (local_q->head == 0){ // the queue is empty
+ jdq_empty++;
+ return GCQ_EMPTY;
+ }
// Try to acquire the lock
@@ -108,5 +118,6 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
mfc_putlluc(local_q, q, 0, 0);
spu_readch(MFC_RdAtomicStat);
- return true;
+ jdq_ok++;
+ return GCQ_OK;
}
diff --git a/gcell/lib/wrapper/Makefile.am b/gcell/lib/wrapper/Makefile.am
index 5a8e328c2..0676ebb70 100644
--- a/gcell/lib/wrapper/Makefile.am
+++ b/gcell/lib/wrapper/Makefile.am
@@ -1,5 +1,5 @@
#
-# Copyright 2008 Free Software Foundation, Inc.
+# Copyright 2008,2009 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
@@ -48,12 +48,15 @@ libwrapper_la_LIBADD = \
libwrapper_qa_la_SOURCES = \
qa_gcell_general.cc \
- qa_gcell_wrapper.cc \
- qa_gcp_fft_1d_r2.cc
+ qa_gcell_wrapper.cc
+
+# FFTW now depends on gcell, don't create circular dependency :-)
+# qa_gcp_fft_1d_r2.cc
libwrapper_qa_la_LIBADD = \
- gcell_general_qa.lo \
- -lfftw3f
+ gcell_general_qa.lo
+
+# -lfftw3f
# Headers
diff --git a/gcell/lib/wrapper/qa_gcell_wrapper.cc b/gcell/lib/wrapper/qa_gcell_wrapper.cc
index d53c61057..ccfd2fdee 100644
--- a/gcell/lib/wrapper/qa_gcell_wrapper.cc
+++ b/gcell/lib/wrapper/qa_gcell_wrapper.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2009 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -27,7 +27,7 @@
#include <qa_gcell_wrapper.h>
#include <qa_gcell_general.h>
-#include <qa_gcp_fft_1d_r2.h>
+//#include <qa_gcp_fft_1d_r2.h>
CppUnit::TestSuite *
qa_gcell_wrapper::suite()
@@ -35,7 +35,7 @@ qa_gcell_wrapper::suite()
CppUnit::TestSuite *s = new CppUnit::TestSuite("wrapper");
s->addTest(qa_gcell_general::suite());
- s->addTest(qa_gcp_fft_1d_r2::suite());
+ //s->addTest(qa_gcp_fft_1d_r2::suite());
return s;
}