summaryrefslogtreecommitdiff
path: root/gcell/lib/runtime/spu/gc_main.c
diff options
context:
space:
mode:
authoreb2008-12-22 04:24:34 +0000
committereb2008-12-22 04:24:34 +0000
commit06e7a0313a09ee812061d855a47206ed303eac7f (patch)
tree73314d935e6aa06e60b533610dd034ab100a89ec /gcell/lib/runtime/spu/gc_main.c
parent13b15589f0b98fbd13fa42c31dcfbe2674dd562c (diff)
downloadgnuradio-06e7a0313a09ee812061d855a47206ed303eac7f.tar.gz
gnuradio-06e7a0313a09ee812061d855a47206ed303eac7f.tar.bz2
gnuradio-06e7a0313a09ee812061d855a47206ed303eac7f.zip
Merged eb/gcell-wip2 rev 10130:10152 into trunk.
This makes several gcell related changes. {{{ The first two are INCOMPATIBLE CHANGES: (1) The gcell portion of the code base was reorganized. As part of that reorganization, the paths to the include files changed. They are now installed under PREFIX/include/gcell instead of directly in PREFIX/include. This means that all includes of the form: #include <gc_foo.h> should be changed to #include <gcell/gc_foo.h> (2a) If you are directly using gcell-embedspu-libtool or the $(GCELL_EMBEDSPU_LIBTOOL) variable in your Makefiles, the order of the two command line arguments was switched. It's now $(GCELL_EMBEDSPU_LIBTOOL) input_file output_file or equivalently $(GCELL_EMBEDSPU_LIBTOOL) $< $@ gcell-embedspu-libtool allows you to convert an SPE executable into something that libtool will allow you add to a host shared library. (2b) The name of the symbol created by gcell-embedspu-libtool is now suffixed with _spx (SPE executable) to reduce the probability of name collision. If you have lines like this: extern spe_program_handle_t gcell_all; in your code, you may have to change them to: extern spe_program_handle_t gcell_all_spx; The following changes are enhancements and shouldn't break any existing code: (3) We now install two new pkg-config files, gcell.pc and gcell_spu.pc. These can be used to assist in building gcell code that lives outside the GNU Radio repository. The first gives the include and library paths for the PPE host code, the second is the same info for the the SPE code. There is also a new .m4 macro, GR_GCELL, contained in config/gr_gcell.m4, that uses PKG_CONFIG_MODULES to fish out the relevant variables. If you've got standalone code that uses gcell, you'll probably want to copy this macro (along with our version of pkg.m4) to your tree and use it. It sets the following variables: GCELL_CFLAGS GCELL_CPPFLAGS GCELL_INCLUDEDIR GCELL_LIBS GCELL_SPU_CFLAGS GCELL_SPU_CPPFLAGS GCELL_SPU_INCLUDEDIR GCELL_SPU_LIBS GCELL_EMBEDSPU_LIBTOOL (4) make -j now works in the gcell directory (fixes ticket:242). }}} git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@10153 221aa14e-8319-0410-a670-987f0aec2ac5
Diffstat (limited to 'gcell/lib/runtime/spu/gc_main.c')
-rw-r--r--gcell/lib/runtime/spu/gc_main.c708
1 files changed, 708 insertions, 0 deletions
diff --git a/gcell/lib/runtime/spu/gc_main.c b/gcell/lib/runtime/spu/gc_main.c
new file mode 100644
index 000000000..0866c3c3d
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_main.c
@@ -0,0 +1,708 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+// #define ENABLE_GC_LOGGING // define to enable logging
+
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <sync_utils.h>
+#include "gc_spu_config.h"
+#include "spu_buffers.h"
+#include <gcell/gc_spu_args.h>
+#include <gcell/gc_job_desc.h>
+#include <gcell/gc_mbox.h>
+#include <gcell/gc_declare_proc.h>
+#include <gcell/spu/gc_jd_queue.h>
+#include <gcell/spu/gc_random.h>
+#include <gcell/spu/gc_delay.h>
+
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+//! round x down to p2 boundary (p2 must be a power-of-2)
+#define ROUND_DN(x, p2) ((x) & ~((p2)-1))
+
+//! round x up to p2 boundary (p2 must be a power-of-2)
+#define ROUND_UP(x, p2) (((x)+((p2)-1)) & ~((p2)-1))
+
+
+#define USE_LLR_LOST_EVENT 0 // define to 0 or 1
+
+int gc_sys_tag; // tag for misc DMA operations
+static gc_spu_args_t spu_args;
+
+static struct gc_proc_def *gc_proc_def; // procedure entry points
+
+// ------------------------------------------------------------------------
+
+// state for DMA'ing arguments in and out
+
+static int get_tag; // 1 tag for job arg gets
+static int put_tags; // 2 tags for job arg puts
+
+static int pb_idx = 0; // current put buffer index (0 or 1)
+
+// bitmask (bit per put buffer): bit is set if DMA is started but not complete
+static int put_in_progress = 0;
+#define PBI_MASK(_pbi_) (1 << (_pbi_))
+
+// ------------------------------------------------------------------------
+
+// our working copy of the completion info
+static gc_comp_info_t comp_info = {
+ .in_use = 1,
+ .ncomplete = 0
+};
+
+static int ci_idx = 0; // index of current comp_info
+static int ci_tags; // two consecutive dma tags
+
+// ------------------------------------------------------------------------
+
+/*
+ * Wait until EA copy of comp_info[idx].in_use is 0
+ */
+static void
+wait_for_ppe_to_be_done_with_comp_info(int idx)
+{
+ char _tmp[256];
+ char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer
+ gc_comp_info_t *p = (gc_comp_info_t *) buf;
+
+ assert(sizeof(gc_comp_info_t) == 128);
+
+ do {
+ mfc_get(buf, spu_args.comp_info[idx], 128, gc_sys_tag, 0, 0);
+ mfc_write_tag_mask(1 << gc_sys_tag);
+ mfc_read_tag_status_all();
+ if (p->in_use == 0)
+ return;
+
+ gc_udelay(5);
+
+ } while (1);
+}
+
+static void
+flush_completion_info(void)
+{
+ // events: 0x3X
+
+ static int total_complete = 0;
+
+ if (comp_info.ncomplete == 0)
+ return;
+
+ // ensure that PPE is done with the buffer we're about to overwrite
+ wait_for_ppe_to_be_done_with_comp_info(ci_idx);
+
+ // dma the comp_info out to PPE
+ int tag = ci_tags + ci_idx;
+ mfc_put(&comp_info, spu_args.comp_info[ci_idx], sizeof(gc_comp_info_t), tag, 0, 0);
+
+ // we need to wait for the completion info to finish, as well as
+ // any EA argument puts.
+
+ int tag_mask = 1 << tag; // the comp_info tag
+ if (put_in_progress & PBI_MASK(0))
+ tag_mask |= (1 << (put_tags + 0));
+ if (put_in_progress & PBI_MASK(1))
+ tag_mask |= (1 << (put_tags + 1));
+
+ gc_log_write2(GCL_SS_SYS, 0x30, put_in_progress, tag_mask);
+
+ mfc_write_tag_mask(tag_mask); // the tags we're interested in
+ mfc_read_tag_status_all(); // wait for DMA to complete
+ put_in_progress = 0; // mark them all complete
+
+ total_complete += comp_info.ncomplete;
+ gc_log_write4(GCL_SS_SYS, 0x31,
+ put_in_progress, ci_idx, comp_info.ncomplete, total_complete);
+
+ // send PPE a message
+ spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
+
+ ci_idx ^= 0x1; // switch buffers
+ comp_info.in_use = 1;
+ comp_info.ncomplete = 0;
+}
+
+// ------------------------------------------------------------------------
+
+static unsigned int backoff; // current backoff value in clock cycles
+static unsigned int _backoff_start;
+static unsigned int _backoff_cap;
+
+/*
+ * For 3.2 GHz SPE
+ *
+ * 12 4095 cycles 1.3 us
+ * 13 8191 cycles 2.6 us
+ * 14 16383 cycles 5.1 us
+ * 15 32767 cycles 10.2 us
+ * 16 20.4 us
+ * 17 40.8 us
+ * 18 81.9 us
+ * 19 163.8 us
+ * 20 327.7 us
+ * 21 655.4 us
+ */
+static unsigned char log2_backoff_start[16] = {
+// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+// -------------------------------------------------------------
+ 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+};
+
+static unsigned char log2_backoff_cap[16] = {
+// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+// -------------------------------------------------------------
+ 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+};
+
+static void
+backoff_init(void)
+{
+ _backoff_cap = (1 << (log2_backoff_cap[(spu_args.nspus - 1) & 0xf])) - 1;
+ _backoff_start = (1 << (log2_backoff_start[(spu_args.nspus - 1) & 0xf])) - 1;
+
+ backoff = _backoff_start;
+}
+
+static void
+backoff_reset(void)
+{
+ backoff = _backoff_start;
+}
+
+#if 0
+
+static void
+backoff_delay(void)
+{
+ gc_cdelay(backoff);
+
+ // capped exponential backoff
+ backoff = ((backoff << 1) + 1) & _backoff_cap;
+}
+
+#else
+
+#define RANDOM_WEIGHT 0.2
+
+static void
+backoff_delay(void)
+{
+ gc_cdelay(backoff);
+
+ backoff = ((backoff << 1) + 1);
+ if (backoff > _backoff_cap)
+ backoff = _backoff_cap;
+
+ float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5)));
+ backoff = backoff * (1.0 + r);
+}
+
+#endif
+
+// ------------------------------------------------------------------------
+
+static inline unsigned int
+make_mask(int nbits)
+{
+ return ~(~0 << nbits);
+}
+
+static unsigned int dc_work;
+static int dc_put_tag;
+static unsigned char *dc_ls_base;
+static gc_eaddr_t dc_ea_base;
+
+// divide and conquer
+static void
+d_and_c(unsigned int offset, unsigned int len)
+{
+ unsigned int mask = make_mask(len) << offset;
+ unsigned int t = mask & dc_work;
+ if (t == 0) // nothing to do
+ return;
+ if (t == mask){ // got a match, generate dma
+ mfc_put(dc_ls_base + offset, dc_ea_base + offset, len, dc_put_tag, 0, 0);
+ }
+ else { // bisect
+ len >>= 1;
+ d_and_c(offset, len);
+ d_and_c(offset + len, len);
+ }
+}
+
+// Handle the nasty case of a dma xfer that's less than 16 bytes long.
+// len is guaranteed to be in [1, 15]
+
+static void
+handle_slow_and_tedious_dma(gc_eaddr_t ea, unsigned char *ls,
+ unsigned int len, int put_tag)
+{
+ // Set up for divide and conquer
+ unsigned int alignment = ((uintptr_t) ls) & 0x7;
+ dc_work = make_mask(len) << alignment;
+ dc_ls_base = (unsigned char *) ROUND_DN((uintptr_t) ls, 8);
+ dc_ea_base = ROUND_DN(ea, (gc_eaddr_t) 8);
+ dc_put_tag = put_tag;
+
+ d_and_c( 0, 8);
+ d_and_c( 8, 8);
+ d_and_c(16, 8);
+}
+
+
+static void
+process_job(gc_eaddr_t jd_ea, gc_job_desc_t *jd)
+{
+ // events: 0x2X
+
+ jd->status = JS_OK; // assume success
+
+ if (jd->proc_id >= spu_args.nproc_defs)
+ jd->status = JS_UNKNOWN_PROC;
+
+ else {
+
+ if (jd->eaa.nargs == 0)
+ (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa);
+
+ else { // handle EA args that must be DMA'd in/out
+
+ gc_job_ea_args_t *eaa = &jd->eaa;
+
+ int NELMS =
+ MAX(MAX_ARGS_EA,
+ (GC_SPU_BUFSIZE + MFC_MAX_DMA_SIZE - 1) / MFC_MAX_DMA_SIZE);
+
+ mfc_list_element_t dma_get_list[NELMS];
+ //mfc_list_element_t dma_put_list[NELMS];
+
+ memset(dma_get_list, 0, sizeof(dma_get_list));
+ //memset(dma_put_list, 0, sizeof(dma_put_list));
+
+ int gli = 0; // get list index
+ //int pli = 0; // put list index
+
+ unsigned char *get_base = _gci_getbuf[0];
+ unsigned char *get_t = get_base;
+ unsigned int total_get_dma_len = 0;
+
+ unsigned char *put_base = _gci_putbuf[pb_idx];
+ unsigned char *put_t = put_base;
+ unsigned int total_put_alloc = 0;
+ int put_tag = put_tags + pb_idx;
+
+ // Do we have any "put" args? If so ensure that previous
+ // dma from this buffer is complete
+
+ gc_log_write2(GCL_SS_SYS, 0x24, put_in_progress, jd->sys.direction_union);
+
+ if ((jd->sys.direction_union & GCJD_DMA_PUT)
+ && (put_in_progress & PBI_MASK(pb_idx))){
+
+ gc_log_write2(GCL_SS_SYS, 0x25, put_in_progress, 1 << put_tag);
+
+ mfc_write_tag_mask(1 << put_tag); // the tag we're interested in
+ mfc_read_tag_status_all(); // wait for DMA to complete
+ put_in_progress &= ~(PBI_MASK(pb_idx));
+
+ gc_log_write1(GCL_SS_SYS, 0x26, put_in_progress);
+ }
+
+
+ // for now, all EA's must have the same high 32-bits
+ gc_eaddr_t common_ea = eaa->arg[0].ea_addr;
+
+
+ // assign LS addresses for buffers
+
+ for (unsigned int i = 0; i < eaa->nargs; i++){
+
+ gc_eaddr_t ea_base = 0;
+ unsigned char *ls_base;
+ int offset;
+ unsigned int dma_len;
+
+ if (eaa->arg[i].direction == GCJD_DMA_GET){
+ ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
+ offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
+ dma_len = ROUND_UP(eaa->arg[i].get_size + offset, CACHE_LINE_SIZE);
+ total_get_dma_len += dma_len;
+
+ if (total_get_dma_len > GC_SPU_BUFSIZE){
+ jd->status = JS_ARGS_TOO_LONG;
+ goto wrap_up;
+ }
+
+ ls_base = get_t;
+ get_t += dma_len;
+ eaa->arg[i].ls_addr = ls_base + offset;
+
+ if (0){
+ assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f) == ((intptr_t)eaa->arg[i].ls_addr & 0x7f));
+ assert((ea_base & 0x7f) == 0);
+ assert(((intptr_t)ls_base & 0x7f) == 0);
+ assert((dma_len & 0x7f) == 0);
+ assert((eaa->arg[i].get_size <= dma_len)
+ && dma_len <= (eaa->arg[i].get_size + offset + CACHE_LINE_SIZE - 1));
+ }
+
+ // add to dma get list
+ // FIXME (someday) the dma list is where the JS_BAD_EAH limitation comes from
+
+ while (dma_len != 0){
+ int n = MIN(dma_len, MFC_MAX_DMA_SIZE);
+ dma_get_list[gli].size = n;
+ dma_get_list[gli].eal = mfc_ea2l(ea_base);
+ dma_len -= n;
+ ea_base += n;
+ gli++;
+ }
+ }
+
+ else if (eaa->arg[i].direction == GCJD_DMA_PUT){
+ //
+ // This case is a trickier than the PUT case since we can't
+ // write outside of the bounds of the user provided buffer.
+ // We still align the buffers to 128-bytes for good performance
+ // in the middle portion of the xfers.
+ //
+ ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
+ offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
+
+ uint32_t ls_alloc_len =
+ ROUND_UP(eaa->arg[i].put_size + offset, CACHE_LINE_SIZE);
+
+ total_put_alloc += ls_alloc_len;
+
+ if (total_put_alloc > GC_SPU_BUFSIZE){
+ jd->status = JS_ARGS_TOO_LONG;
+ goto wrap_up;
+ }
+
+ ls_base = put_t;
+ put_t += ls_alloc_len;
+ eaa->arg[i].ls_addr = ls_base + offset;
+
+ if (1){
+ assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f)
+ == ((intptr_t)eaa->arg[i].ls_addr & 0x7f));
+ assert((ea_base & 0x7f) == 0);
+ assert(((intptr_t)ls_base & 0x7f) == 0);
+ }
+ }
+
+ else
+ assert(0);
+ }
+
+ // fire off the dma to fetch the args and wait for it to complete
+ mfc_getl(get_base, common_ea, dma_get_list, gli*sizeof(dma_get_list[0]), get_tag, 0, 0);
+ mfc_write_tag_mask(1 << get_tag); // the tag we're interested in
+ mfc_read_tag_status_all(); // wait for DMA to complete
+
+ // do the work
+ (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa);
+
+
+ // Do we have any "put" args? If so copy them out
+ if (jd->sys.direction_union & GCJD_DMA_PUT){
+
+ // Do the copy out using single DMA xfers. The LS ranges
+ // aren't generally contiguous.
+
+ bool started_dma = false;
+
+ for (unsigned int i = 0; i < eaa->nargs; i++){
+ if (eaa->arg[i].direction == GCJD_DMA_PUT && eaa->arg[i].put_size != 0){
+
+ started_dma = true;
+
+ gc_eaddr_t ea;
+ unsigned char *ls;
+ unsigned int len;
+
+ ea = eaa->arg[i].ea_addr;
+ ls = (unsigned char *) eaa->arg[i].ls_addr;
+ len = eaa->arg[i].put_size;
+
+ if (len < 16)
+ handle_slow_and_tedious_dma(ea, ls, len, put_tag);
+
+ else {
+ if ((ea & 0xf) != 0){
+
+ // printf("1: ea = 0x%x len = %5d\n", (int) ea, len);
+
+ // handle the "pre-multiple-of-16" portion
+ // do 1, 2, 4, or 8 byte xfers as required
+
+ if (ea & 0x1){ // do a 1-byte xfer
+ mfc_put(ls, ea, 1, put_tag, 0, 0);
+ ea += 1;
+ ls += 1;
+ len -= 1;
+ }
+ if (ea & 0x2){ // do a 2-byte xfer
+ mfc_put(ls, ea, 2, put_tag, 0, 0);
+ ea += 2;
+ ls += 2;
+ len -= 2;
+ }
+ if (ea & 0x4){ // do a 4-byte xfer
+ mfc_put(ls, ea, 4, put_tag, 0, 0);
+ ea += 4;
+ ls += 4;
+ len -= 4;
+ }
+ if (ea & 0x8){ // do an 8-byte xfer
+ mfc_put(ls, ea, 8, put_tag, 0, 0);
+ ea += 8;
+ ls += 8;
+ len -= 8;
+ }
+ }
+
+ if (1){
+ // printf("2: ea = 0x%x len = %5d\n", (int) ea, len);
+ assert((ea & 0xf) == 0);
+ assert((((intptr_t) ls) & 0xf) == 0);
+ }
+
+ // handle the "multiple-of-16" portion
+
+ int aligned_len = ROUND_DN(len, 16);
+ len = len & (16 - 1);
+
+ while (aligned_len != 0){
+ int dma_len = MIN(aligned_len, MFC_MAX_DMA_SIZE);
+ mfc_put(ls, ea, dma_len, put_tag, 0, 0);
+ ea += dma_len;
+ ls += dma_len;
+ aligned_len -= dma_len;
+ }
+
+ if (1){
+ // printf("3: ea = 0x%x len = %5d\n", (int)ea, len);
+ assert((ea & 0xf) == 0);
+ assert((((intptr_t) ls) & 0xf) == 0);
+ }
+
+ // handle "post-multiple-of-16" portion
+
+ if (len != 0){
+
+ if (len >= 8){ // do an 8-byte xfer
+ mfc_put(ls, ea, 8, put_tag, 0, 0);
+ ea += 8;
+ ls += 8;
+ len -= 8;
+ }
+ if (len >= 4){ // do a 4-byte xfer
+ mfc_put(ls, ea, 4, put_tag, 0, 0);
+ ea += 4;
+ ls += 4;
+ len -= 4;
+ }
+ if (len >= 2){ // do a 2-byte xfer
+ mfc_put(ls, ea, 2, put_tag, 0, 0);
+ ea += 2;
+ ls += 2;
+ len -= 2;
+ }
+ if (len >= 1){ // do a 1-byte xfer
+ mfc_put(ls, ea, 1, put_tag, 0, 0);
+ ea += 1;
+ ls += 1;
+ len -= 1;
+ }
+ if (1)
+ assert(len == 0);
+ }
+ }
+ }
+ }
+ if (started_dma){
+ put_in_progress |= PBI_MASK(pb_idx); // note it's running
+ gc_log_write2(GCL_SS_SYS, 0x27, put_in_progress, pb_idx);
+ pb_idx ^= 1; // toggle current buffer
+ }
+ }
+ }
+ }
+
+ wrap_up:; // semicolon creates null statement for C99 compliance
+
+ // Copy job descriptor back out to EA.
+ // (The dma will be waited on in flush_completion_info)
+ int tag = ci_tags + ci_idx; // use the current completion tag
+ mfc_put(jd, jd_ea, sizeof(*jd), tag, 0, 0);
+
+ // Tell PPE we're done with the job.
+ //
+ // We queue these up until we run out of room, or until we can send
+ // the info to the PPE w/o blocking. The blocking check is in
+ // main_loop
+
+ comp_info.job_id[comp_info.ncomplete++] = jd->sys.job_id;
+
+ if (comp_info.ncomplete == GC_CI_NJOBS){
+ gc_log_write0(GCL_SS_SYS, 0x28);
+ flush_completion_info();
+ }
+}
+
+static void
+main_loop(void)
+{
+ // events: 0x1X
+
+ static gc_job_desc_t jd; // static gets us proper alignment
+ gc_eaddr_t jd_ea;
+ int total_jobs = 0;
+
+#if (USE_LLR_LOST_EVENT)
+ // setup events
+ spu_writech(SPU_WrEventMask, MFC_LLR_LOST_EVENT);
+
+ // prime the pump
+ while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
+ process_job(jd_ea, &jd);
+ // we're now holding a lock-line reservation
+#endif
+
+ while (1){
+
+#if (USE_LLR_LOST_EVENT)
+
+ if (unlikely(spu_readchcnt(SPU_RdEventStat))){
+ //
+ // execute standard event handling prologue
+ //
+ int status = spu_readch(SPU_RdEventStat);
+ int mask = spu_readch(SPU_RdEventMask);
+ spu_writech(SPU_WrEventMask, mask & ~status); // disable active events
+ spu_writech(SPU_WrEventAck, status); // ack active events
+
+ // execute per-event actions
+
+ if (status & MFC_LLR_LOST_EVENT){
+ //
+ // We've lost a line reservation. This is most likely caused
+ // by somebody doing something to the queue. Go look and see
+ // if there's anything for us.
+ //
+ while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
+ process_job(jd_ea, &jd);
+ }
+
+ //
+ // execute standard event handling epilogue
+ //
+ spu_writech(SPU_WrEventMask, mask); // restore event mask
+ }
+
+#else
+
+ // try to get a job from the job queue
+ if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){
+ total_jobs++;
+ gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
+
+ process_job(jd_ea, &jd);
+
+ gc_log_write2(GCL_SS_SYS, 0x11, jd.sys.job_id, total_jobs);
+ backoff_reset();
+ }
+ else
+ backoff_delay();
+
+#endif
+
+ // any msgs for us?
+
+ if (unlikely(spu_readchcnt(SPU_RdInMbox))){
+ int msg = spu_readch(SPU_RdInMbox);
+ // printf("spu[%d] mbox_msg: 0x%08x\n", spu_args.spu_idx, msg);
+ if (MBOX_MSG_OP(msg) == OP_EXIT){
+ flush_completion_info();
+ return;
+ }
+ if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
+ spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_SPU_BUFSIZE, GC_SPU_BUFSIZE_BASE));
+ }
+ }
+
+ // If we've got job completion info for the PPE and we can send a
+ // message without blocking, do it.
+
+ if (comp_info.ncomplete != 0 && spu_readchcnt(SPU_WrOutIntrMbox) != 0){
+ gc_log_write0(GCL_SS_SYS, 0x12);
+ flush_completion_info();
+ }
+ }
+}
+
+
+int
+main(unsigned long long spe_id __attribute__((unused)),
+ unsigned long long argp,
+ unsigned long long envp __attribute__((unused)))
+{
+ gc_sys_tag = mfc_tag_reserve(); // allocate a tag for our misc DMA operations
+ get_tag = mfc_tag_reserve();
+ ci_tags = mfc_multi_tag_reserve(2);
+ put_tags = mfc_multi_tag_reserve(2);
+
+#if 0
+ printf("gc_sys_tag = %d\n", gc_sys_tag);
+ printf("get_tag = %d\n", get_tag);
+ printf("ci_tags = %d\n", ci_tags);
+ printf("put_tags = %d\n", put_tags);
+#endif
+
+ // dma the args in
+ mfc_get(&spu_args, argp, sizeof(spu_args), gc_sys_tag, 0, 0);
+ mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
+ mfc_read_tag_status_all(); // wait for DMA to complete
+
+ // initialize pointer to procedure entry table
+ gc_proc_def = (gc_proc_def_t *) spu_args.proc_def_ls_addr;
+
+ gc_set_seed(spu_args.spu_idx);
+
+ // initialize logging
+ _gc_log_init(spu_args.log);
+
+ backoff_init(); // initialize backoff parameters
+
+ main_loop();
+ return 0;
+}