43 files changed, 6097 insertions, 0 deletions
diff --git a/gcell/lib/Makefile.am b/gcell/lib/Makefile.am
new file mode 100644
index 000000000..e7b349331
--- /dev/null
+++ b/gcell/lib/Makefile.am
@@ -0,0 +1,50 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+SUBDIRS = spu runtime general wrapper .
+
+# generate libgcell.la from the convenience libraries in subdirs
+
+lib_LTLIBRARIES = libgcell.la libgcell-qa.la
+
+libgcell_la_SOURCES = 
+libgcell_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+
+libgcell_qa_la_SOURCES = 
+libgcell_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+
+libgcell_la_LIBADD = \
+	runtime/libruntime.la \
+	wrapper/libwrapper.la \
+	-lspe2 \
+	$(OMNITHREAD_LA)
+
+libgcell_qa_la_LIBADD = \
+	runtime/libruntime-qa.la \
+	wrapper/libwrapper-qa.la \
+	$(CPPUNIT_LIBS)
+
+
+
+
+
+
diff --git a/gcell/lib/general/Makefile.am b/gcell/lib/general/Makefile.am
new file mode 100644
index 000000000..bd5a4de62
--- /dev/null
+++ b/gcell/lib/general/Makefile.am
@@ -0,0 +1,23 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+
diff --git a/gcell/lib/general/spu/fft_1d_r2.c b/gcell/lib/general/spu/fft_1d_r2.c
new file mode 100644
index 000000000..0a87e74a8
--- /dev/null
+++ b/gcell/lib/general/spu/fft_1d_r2.c
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/libfft.h>
+#include <gcell/spu/fft_1d_r2.h>
+#include <assert.h>
+
+/*
+ * invoke the inline version
+ */
+void 
+fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size)
+{
+  assert((1 << log2_size) <= MAX_FFT_1D_SIZE);
+
+  _fft_1d_r2(out, in, W, log2_size);
+}
diff --git a/gcell/lib/general/spu/memset.S b/gcell/lib/general/spu/memset.S
new file mode 100644
index 000000000..39eabce02
--- /dev/null
+++ b/gcell/lib/general/spu/memset.S
@@ -0,0 +1,185 @@
+/* -*- asm -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/gc_spu_macs.h>
+
+	.file "memset.S"
+
+	/*
+	 * Computes this, only a lot faster...
+	 *
+	 *	void *
+	 *	memset(void *pv, int c, size_t n)
+	 *	{
+	 *	  unsigned char *p = (unsigned char *) pv;
+	 *	  size_t i;
+	 *	  for (i = 0; i < n; i++)
+	 *	    p[i] = c;
+	 *	
+	 *	  return pv;
+	 *	}
+	 */
+	
+#define	p_arg	arg1	// we're going to clobber arg1 w/ the return value
+#define	c	arg2	// the constant we're writing
+#define	n	arg3	// how many bytes to write
+
+#define	p	r13	// where we're writing
+#define	t0	r14
+#define t1	r15
+#define	mask	r16
+#define	old	r17
+#define an	r18	// aligned n (n rounded down to mod 16 boundary)
+#define	next_p	r19
+#define	cond1	r20
+#define	cond2	r21				
+#define m	r22
+#define r	r23
+	
+	PROC_ENTRY(memset)
+	
+	// Hint the return from do_head, in case we go that way.
+	// There's pretty much nothing to can do to hint the branch to it.
+	hbrr	do_head_br, head_complete
+	
+	MR(p, p_arg)	// leaves p, the return value, in the correct reg (r3)
+	BRZ_RETURN(n)
+
+	MODULO(t0, p, 16)	// is p%16 == 0?
+	VSPLTB(c, c, 3)		// splat byte in preferred slot of c into all slots
+	brnz	t0, do_head	// no, handle it
+head_complete:
+
+	/*
+	 * preconditions:	
+	 *   p%16 == 0, n > 0
+	 */
+	hbrr	middle_loop_br, middle_loop
+	
+	ROUND_DOWN(an, n, 16)	// an is "aligned n"
+	MODULO(n, n, 16)	// what's left over in the last quad
+	brz	an, do_tail	// no whole quad words; skip to tail
+	clgti	t0, an, 127	// an >= 128?
+	brz	t0, middle2	// nope, go handle the cases between 0 and 112
+
+	/*
+	 * 128 bytes / iteration
+	 */
+	.p2align 4
+middle_loop:
+	ai	an, an, -128
+	  stqd	c,  0*16(p)
+	ai	next_p, p, 128
+	  stqd	c,  1*16(p)
+	cgti	cond1, an, 127
+	  stqd	c,  2*16(p)
+
+	  stqd	c,  3*16(p)
+	  stqd	c,  4*16(p)
+	  stqd	c,  5*16(p)
+	  stqd	c,  6*16(p)
+	
+	MR(p, next_p)
+	  stqd	c,  7*16-128(next_p)
+	or	cond2, n, an
+middle_loop_br:
+	  brnz	cond1, middle_loop
+	
+	/*
+	 * if an and n are both zero, return now 
+	 */
+	BRZ_RETURN(cond2)
+
+	/*
+	 * otherwise handle last of full quad words 
+	 *
+	 *   0 <= an < 128, p%16 == 0
+	 */
+middle2:
+	/*
+	 * if an == 0, go handle the final non-full quadword
+	 */
+	brz	an, do_tail
+	hbrr	middle2_loop_br, middle2_loop
+	
+	.p2align 3
+middle2_loop:	
+	ai	next_p, p, 16
+	  stqd	c, 0(p)
+	ai	an, an, -16
+	  LMR(p, next_p)
+middle2_loop_br:
+	  brnz	an, middle2_loop
+	
+	/* We're done with the full quadwords. */
+	
+	/*
+	 * Handle the final partial quadword.
+	 * We'll be modifying only the left hand portion of the quad.
+	 *
+	 * preconditions:
+	 *   an == 0, 0 <= n < 16, p%16 == 0
+	 */
+do_tail:
+	HINT_RETURN(do_tail_ret)
+	il	mask, -1
+	sfi	t1, n, 16		// t1 = 16 - n
+	lqd	old, 0(p)
+	shlqby  mask, mask, t1
+	selb	t0, old, c, mask
+	stqd	t0, 0(p)
+do_tail_ret:	
+	RETURN()
+
+	/*
+	 * ----------------------------------------------------------------
+	 * Handle the first partial quadword
+	 *
+	 * preconditions:
+	 *   p%16 != 0
+	 *
+         * postconditions:
+         *   p%16 == 0 or n == 0
+         *
+         *        |-- m --|
+         *     +----------------+----------------+
+         *     |  ////////      |                |
+         *     +----------------+----------------+
+         *        |----- r -----|
+         *        p
+         * ----------------------------------------------------------------
+	 */
+do_head:
+	lqd	old, 0(p)
+	MODULO_NEG(r, p, 16)
+	il	mask, -1
+	UMIN(m, r, n)
+	shlqby	mask, mask, m	// 1's in the top, m*8 0's in the bottom
+	MR(t1, p)
+	sf	t0, m, r	// t0 = r - m
+	a	p, p, m		// p += m
+	rotqby	mask, mask, t0	// rotate 0's to the right place	
+	sf	n, m, n		// n -= m
+	selb	t0, c, old, mask // merge
+	stqd	t0, 0(t1)
+	BRZ_RETURN(n)
+do_head_br:
+	br	head_complete
diff --git a/gcell/lib/general/spu/qa_memset.c b/gcell/lib/general/spu/qa_memset.c
new file mode 100644
index 000000000..e51b02c9a
--- /dev/null
+++ b/gcell/lib/general/spu/qa_memset.c
@@ -0,0 +1,201 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_declare_proc.h>
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <string.h>
+#include <stdio.h>
+
+
+#define MAX_QA_BYTES  1024
+#define	MAX_OFFSET    32
+#define	ALIGNMENT     16
+#define	K	      0xA5
+
+// FIXME should be passed at gcell init time
+//static const int TIMEBASE = 79800000; // ps3
+static const int TIMEBASE = 26666666; // qs21
+
+typedef void* (*memset_fptr)(void *s, int val, size_t n);
+
+void *
+memset_ref(void *sv, int c, size_t n)
+{
+  unsigned char *s = (unsigned char *) sv;
+  size_t i;
+  for (i = 0; i < n; i++)
+    s[i] = c;
+
+  return sv;
+}
+
+static bool
+check_before(unsigned char *buf, size_t len, size_t offset)
+{
+  unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+  bool ok = true;
+  int i;
+
+  for (i = -16; i < 0; i++){
+    unsigned char expected = (&p[i] - buf) & 0xff;
+    if (p[i] != expected){
+      printf("b:memset(%p, 0x%x, %zu) <offset %2zd> [%3d] expected %02x, got %02x\n",
+	     p, K, len, offset, i, K, p[i]);
+      ok = false;
+    }
+  }
+  return ok;
+}
+
+static bool
+check_middle(unsigned char *buf, size_t len, size_t offset)
+{
+  unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+  bool ok = true;
+  size_t i;
+
+  for (i = 0; i < len; i++){
+    unsigned char expected = K;
+    if (p[i] != expected){
+      printf("m:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n",
+	     p, K, len, offset, i, expected, p[i]);
+      ok = false;
+    }
+  }
+  return ok;
+}
+
+static bool
+check_after(unsigned char *buf, size_t len, size_t offset)
+{
+  unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+  bool ok = true;
+  size_t i;
+
+  for (i = len; i < len + 16; i++){
+    unsigned char expected = (&p[i] - buf) & 0xff;
+    if (p[i] != expected){
+      printf("a:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n",
+	     p, K, len, offset, i, expected, p[i]);
+      ok = false;
+    }
+  }
+  return ok;
+}
+
+
+static bool
+test_memset_aux(memset_fptr f,
+		unsigned char *buf, size_t buflen, size_t len, size_t offset)
+{
+  size_t i;
+
+  // init buffer to non-zero known state
+  for (i = 0; i < buflen; i++)
+    buf[i] = i;
+  
+  // Our working buffer.  Starts 16 bytes + offset into buf.
+  // We offset by 16 so that we can see if data before is getting damaged.
+  unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+
+  (*f)(p, K, len);
+
+  bool ok = true;
+  ok &= check_before(buf, len, offset);
+  ok &= check_middle(buf, len, offset);
+  ok &= check_after(buf, len, offset);
+
+  return ok;
+}
+
+bool
+test_memset(memset_fptr f)
+{
+  size_t BUFLEN = MAX_QA_BYTES + 2*sizeof(vector unsigned char) + MAX_OFFSET;
+  unsigned char unaligned_buf[BUFLEN + ALIGNMENT -1];
+  unsigned char *aligned_buf =
+    (unsigned char *)((((intptr_t) unaligned_buf) + ALIGNMENT - 1) & -ALIGNMENT);
+
+  // printf("unaligned = %p\n", unaligned_buf);
+  // printf("aligned   = %p\n", aligned_buf);
+
+  size_t len;
+  size_t offset;
+  bool ok = true;
+
+  for (len = 0; len < MAX_QA_BYTES; len++){
+    for (offset = 0; offset <= MAX_OFFSET; offset++){
+      ok &= test_memset_aux(f, aligned_buf, BUFLEN, len, offset);
+    }
+  }
+
+  return ok;
+}
+
+// returns bytes/s
+float
+benchmark_memset(memset_fptr f, bool aligned)
+{
+  static const int SIZE = 32768;
+  unsigned char buf[SIZE];
+  uint32_t	t0, t1;
+  int		nbytes;
+
+  spu_write_decrementer(0xffffffff);
+
+  if (aligned){
+    nbytes = SIZE;
+    t0 = spu_read_decrementer();
+    (*f)(buf, 0x55, nbytes);
+    (*f)(buf, 0x55, nbytes);
+    (*f)(buf, 0x55, nbytes);
+    (*f)(buf, 0x55, nbytes);
+    t1 = spu_read_decrementer();
+  }
+  else {
+    nbytes = SIZE - 2;
+    t0 = spu_read_decrementer();
+    (*f)(buf + 1, 0x55, nbytes);
+    (*f)(buf + 1, 0x55, nbytes);
+    (*f)(buf + 1, 0x55, nbytes);
+    (*f)(buf + 1, 0x55, nbytes);
+    t1 = spu_read_decrementer();
+  }
+
+  //printf("delta ticks: %d\n", t0 - t1);
+  return (float) nbytes * 4 / ((t0 - t1) * 1.0/TIMEBASE);
+}
+
+/*
+ * Implement the standard QA stub.
+ * No input arguments, 1 bool output.
+ */
+static void
+gcs_qa_memset(const gc_job_direct_args_t *input _UNUSED,
+	      gc_job_direct_args_t *output,
+	      const gc_job_ea_args_t *eaa _UNUSED)
+{
+  bool ok = test_memset(memset);
+  output->arg[0].u32 = ok;
+}
+
+GC_DECLARE_PROC(gcs_qa_memset, "qa_memset");
diff --git a/gcell/lib/runtime/Makefile.am b/gcell/lib/runtime/Makefile.am
new file mode 100644
index 000000000..2c653918e
--- /dev/null
+++ b/gcell/lib/runtime/Makefile.am
@@ -0,0 +1,66 @@
+#
+# Copyright 2007,2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+IBM_PPU_SYNC_INCLUDES = -I$(top_srcdir)/gcell/ibm/sync/ppu_source
+
+
+AM_CPPFLAGS = $(DEFINES) $(OMNITHREAD_INCLUDES) $(MBLOCK_INCLUDES) $(CPPUNIT_INCLUDES) \
+	$(GCELL_INCLUDES) $(IBM_PPU_SYNC_INCLUDES) $(WITH_INCLUDES)
+
+
+dist_bin_SCRIPTS = gcell-embedspu-libtool
+
+noinst_LTLIBRARIES = libruntime.la libruntime-qa.la
+
+libruntime_la_SOURCES = \
+	gc_aligned_alloc.cc \
+	gc_job_manager.cc \
+	gc_job_manager_impl.cc \
+	gc_jd_queue.c \
+	gc_jd_stack.c \
+	gc_proc_def_utils.cc
+
+libruntime_qa_la_SOURCES = \
+	qa_gcell_runtime.cc \
+	qa_jd_queue.cc \
+	qa_jd_stack.cc \
+	qa_job_manager.cc
+
+
+noinst_HEADERS = \
+	gc_client_thread_info.h \
+	gc_job_manager_impl.h \
+	gc_proc_def_utils.h \
+	qa_jd_queue.h \
+	qa_jd_stack.h \
+	qa_job_manager.h \
+	qa_gcell_runtime.h
+
+# generate a libtool.lo that contains an embeded SPU executable
+gcell_runtime_qa.lo: ../spu/gcell_runtime_qa
+	$(GCELL_EMBEDSPU_LIBTOOL) $< $@
+
+libruntime_qa_la_LIBADD = \
+	gcell_runtime_qa.lo \
+	libruntime.la
+
+CLEANFILES = gcell_runtime_qa.lo
diff --git a/gcell/lib/runtime/gc_aligned_alloc.cc b/gcell/lib/runtime/gc_aligned_alloc.cc
new file mode 100644
index 000000000..905154e3d
--- /dev/null
+++ b/gcell/lib/runtime/gc_aligned_alloc.cc
@@ -0,0 +1,55 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <gcell/gc_aligned_alloc.h>
+#include <stdlib.h>
+#include <stdexcept>
+#include <string.h>
+
+// custom deleter of anything that can be freed with "free"
+class free_deleter {
+public:
+  void operator()(void *p) {
+    free(p);
+  }
+};
+
+void *
+gc_aligned_alloc(size_t size, size_t alignment)
+{
+  void *p = 0;
+  if (posix_memalign(&p, alignment, size) != 0){
+    perror("posix_memalign");
+    throw std::runtime_error("memory");
+  }
+  memset(p, 0, size);		// zero the memory
+  return p;
+}
+
+boost::shared_ptr<void>
+gc_aligned_alloc_sptr(size_t size, size_t alignment)
+{
+  return boost::shared_ptr<void>(gc_aligned_alloc(size, alignment),
+				 free_deleter());
+}
diff --git a/gcell/lib/runtime/gc_client_thread_info.h b/gcell/lib/runtime/gc_client_thread_info.h
new file mode 100644
index 000000000..fbb35d966
--- /dev/null
+++ b/gcell/lib/runtime/gc_client_thread_info.h
@@ -0,0 +1,81 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GC_CLIENT_THREAD_INFO_H
+#define INCLUDED_GC_CLIENT_THREAD_INFO_H
+
+#include <omnithread.h>
+#include <boost/utility.hpp>
+
+enum gc_ct_state {
+  CT_NOT_WAITING,
+  CT_WAIT_ALL,
+  CT_WAIT_ANY,
+};
+
+/*
+ * \brief per client-thread data used by gc_job_manager
+ *
+ * "Client threads" are any threads that invoke methods on
+ * gc_job_manager.  We use pthread_set_specific to store a pointer to
+ * one of these for each thread that comes our way.
+ */
+class gc_client_thread_info : boost::noncopyable {
+public:
+  gc_client_thread_info() :
+    d_free(1), d_cond(&d_mutex), d_state(CT_NOT_WAITING),
+    d_jobs_done(0), d_njobs_waiting_for(0),
+    d_jobs_waiting_for(0){ }
+
+  ~gc_client_thread_info() {
+    d_free = 1;
+    d_state = CT_NOT_WAITING;
+    d_jobs_done = 0;
+    d_njobs_waiting_for = 0;
+    d_jobs_waiting_for = 0;
+  }
+
+  //! is this cti free? (1->free, 0->in use)
+  uint32_t	  d_free;
+
+  //! which client info are we?
+  uint16_t	  d_client_id;
+
+  //! hold this mutex to manipulate anything below here
+  omni_mutex	  d_mutex;
+
+  //! signaled by event handler to wake client thread up
+  omni_condition  d_cond;
+
+  //! Is this client waiting?
+  gc_ct_state	  d_state;
+  
+  //! Jobs that have finished and not yet been waited for (bitvector)
+  unsigned long	 *d_jobs_done;
+
+  //! # of jobs we're waiting for
+  unsigned int    d_njobs_waiting_for;
+
+  //! Jobs that client thread is waiting for
+  gc_job_desc	 **d_jobs_waiting_for;
+
+};
+
+#endif /* INCLUDED_GC_CLIENT_THREAD_INFO_H */
diff --git a/gcell/lib/runtime/gc_jd_queue.c b/gcell/lib/runtime/gc_jd_queue.c
new file mode 100644
index 000000000..aeabd305a
--- /dev/null
+++ b/gcell/lib/runtime/gc_jd_queue.c
@@ -0,0 +1,78 @@
+/* -*- c -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_jd_queue.h>
+#include <gcell/memory_barrier.h>
+#include <mutex_init.h>
+#include <mutex_lock.h>
+#include <mutex_unlock.h>
+
+void 
+gc_jd_queue_init(gc_jd_queue_t *q)
+{
+  _mutex_init(ptr_to_ea(&q->mutex));
+  q->head = 0;
+  q->tail = 0;
+  smp_wmb();
+}
+  
+void
+gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item)
+{
+  item->sys.next = 0;
+  _mutex_lock(ptr_to_ea(&q->mutex));
+  smp_rmb();		// import barrier
+
+  if (q->tail == 0){    // currently empty
+    q->tail = q->head = jdp_to_ea(item);
+  }
+  else {		// not empty, append
+    ea_to_jdp(q->tail)->sys.next = jdp_to_ea(item);
+    q->tail = jdp_to_ea(item);
+  }
+
+  smp_wmb();		// orders stores above before clearing of mutex
+  _mutex_unlock(ptr_to_ea(&q->mutex));
+}
+
+gc_job_desc_t *
+gc_jd_queue_dequeue(gc_jd_queue_t *q)
+{
+  _mutex_lock(ptr_to_ea(&q->mutex));
+  smp_rmb();		// import barrier
+  
+  gc_eaddr_t item_ea = q->head;
+  if (item_ea == 0){	// empty
+    _mutex_unlock(ptr_to_ea(&q->mutex));
+    return 0;
+  }
+
+  q->head = ea_to_jdp(item_ea)->sys.next;
+  if (q->head == 0)	// now emtpy
+    q->tail = 0;
+
+  gc_job_desc_t *item = ea_to_jdp(item_ea);
+  item->sys.next = 0;
+
+  smp_wmb();		// orders stores above before clearing of mutex
+  _mutex_unlock(ptr_to_ea(&q->mutex));
+  return item;
+}
diff --git a/gcell/lib/runtime/gc_jd_stack.c b/gcell/lib/runtime/gc_jd_stack.c
new file mode 100644
index 000000000..4d865acf0
--- /dev/null
+++ b/gcell/lib/runtime/gc_jd_stack.c
@@ -0,0 +1,168 @@
+/* -*- c -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_jd_stack.h>
+#include <gcell/memory_barrier.h>
+
+/*
+ * begin extract from ppu_intrinics.h
+ * FIXME handle this a different way
+ */
+
+#if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__)
+    && !defined(__GNUC__)
+  #error ppu_intrinsics.h included on wrong platform/compiler
+#endif
+
+#define __lwarx(base) __extension__		\
+  ({unsigned int result;	       		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(base);		\
+  __asm__ volatile ("lwarx %0,%y1"		\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+#ifdef __powerpc64__
+#define __ldarx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(base);	\
+  __asm__ volatile ("ldarx %0,%y1"		       	\
+	   : "=r" (result)				\
+	   : "Z" (*ptrp));				\
+  result; })
+#endif /* __powerpc64__ */
+
+#define __stwcx(base, value) __extension__	\
+  ({unsigned int result;			\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(base);		\
+  __asm__ volatile ("stwcx. %2,%y1\n"		\
+	   "\tmfocrf %0,0x80"			\
+	   : "=r" (result),			\
+	     "=Z" (*ptrp)			\
+	   : "r" (value));			\
+  ((result & 0x20000000) >> 29); })
+
+
+#ifdef __powerpc64__
+#define __stdcx(base, value) __extension__		\
+  ({unsigned long long result;				\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(base);	\
+  __asm__ volatile ("stdcx. %2,%y1\n"			\
+	   "\tmfocrf %0,0x80"				\
+	   : "=r" (result),				\
+	     "=Z" (*ptrp)				\
+	   : "r" (value));				\
+  ((result & 0x20000000) >> 29); })
+#endif /* __powerpc64__ */
+
+
+/*
+ * --- end extract from ppu_intrinics.h --
+ */
+
+
+void 
+gc_jd_stack_init(gc_jd_stack_t *stack)
+{
+  stack->top = 0;
+}
+  
+
+#ifdef __powerpc64__  // 64-bit mode
+
+void 
+gc_jd_stack_push(gc_jd_stack_t *stack, gc_job_desc_t *item)
+{
+  gc_eaddr_t	top;
+  gc_eaddr_t	item_ea = ptr_to_ea(item);
+  unsigned int	done;
+
+  do {
+    top = __ldarx(&stack->top);
+    item->sys.next = top;
+    smp_wmb();	      // order store of item->next before store of stack->top
+    done = __stdcx(&stack->top, item_ea);
+  } while (unlikely(done == 0));
+}
+
+gc_job_desc_t *
+gc_jd_stack_pop(gc_jd_stack_t *stack)
+{
+  gc_eaddr_t	s;
+  gc_eaddr_t	t;
+  unsigned int	done;
+
+  do {
+    s  = __ldarx(&stack->top);
+    if (s == 0)			/* stack's empty */
+      return 0;
+    t = ((gc_job_desc_t *) ea_to_ptr(s))->sys.next;
+    done = __stdcx(&stack->top, t);
+  } while (unlikely(done == 0));
+
+  return ea_to_ptr(s);
+}
+
+#else  // 32-bit mode
+
+/*
+ * In 32-bit mode, gc_eaddr's will have the top 32-bits zero.
+ * The ldarx/stdcx instructions aren't available in 32-bit mode,
+ * thus we use lwarx/stwcx on the low 32-bits of the 64-bit addresses.
+ * Since we're big-endian, the low 32-bits are at word offset 1.
+ */
+void 
+gc_jd_stack_push(gc_jd_stack_t *stack, gc_job_desc_t *item)
+{
+  gc_eaddr_t	top;
+  unsigned int	done;
+
+  do {
+    top = __lwarx((int32_t *)(&stack->top) + 1);
+    item->sys.next = top;
+    smp_wmb();	      // order store of item->sys.next before store of stack->top
+    done = __stwcx((int32_t *)(&stack->top) + 1, item);
+  } while (unlikely(done == 0));
+}
+
+gc_job_desc_t *
+gc_jd_stack_pop(gc_jd_stack_t *stack)
+{
+  gc_eaddr_t	s;
+  gc_eaddr_t	t;
+  unsigned int	done;
+
+  do {
+    s  = __lwarx((int32_t *)(&stack->top) + 1);
+    if (s == 0)			/* stack's empty */
+      return 0;
+    t = ((gc_job_desc_t *) ea_to_ptr(s))->sys.next;
+    done = __stwcx((int32_t *)(&stack->top) + 1, (uint32_t) t);
+  } while (unlikely(done == 0));
+
+  return ea_to_ptr(s);
+}
+
+#endif
diff --git a/gcell/lib/runtime/gc_job_manager.cc b/gcell/lib/runtime/gc_job_manager.cc
new file mode 100644
index 000000000..d96bc5381
--- /dev/null
+++ b/gcell/lib/runtime/gc_job_manager.cc
@@ -0,0 +1,186 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <gcell/gc_job_manager.h>
+#include "gc_job_manager_impl.h"
+#include <boost/weak_ptr.hpp>
+#include <stdio.h>
+
+
+static boost::weak_ptr<gc_job_manager> s_singleton;
+
+
+// custom deleter of gc_job_desc allocated via alloc_job_desc_sptr
+class job_desc_deleter {
+  gc_job_manager_sptr	d_mgr;
+public:
+  job_desc_deleter(gc_job_manager_sptr mgr) : d_mgr(mgr) {}
+
+  void operator()(gc_job_desc *jd) {
+    d_mgr->free_job_desc(jd);
+  }
+};
+
+
+
+gc_job_manager_sptr
+gc_make_job_manager(const gc_jm_options *options)
+{
+  return gc_job_manager_sptr(new gc_job_manager_impl(options));
+}
+
+gc_job_manager::gc_job_manager(const gc_jm_options *options)
+{
+  // nop
+}
+
+gc_job_manager::~gc_job_manager()
+{
+  // nop
+}
+
+void
+gc_job_manager::set_debug(int debug)
+{
+  // nop
+}
+
+int
+gc_job_manager::debug()
+{
+  return 0;
+}
+
+void 
+gc_job_manager::set_singleton(gc_job_manager_sptr mgr)
+{
+  s_singleton = mgr;
+}
+
+gc_job_manager_sptr 
+gc_job_manager::singleton()
+{
+  return gc_job_manager_sptr(s_singleton);
+}
+
+gc_job_desc_sptr 
+gc_job_manager::make_jd_sptr(gc_job_manager_sptr mgr, gc_job_desc *jd)
+{
+  return gc_job_desc_sptr(jd, job_desc_deleter(mgr));
+}
+
+gc_job_desc_sptr 
+gc_job_manager::alloc_job_desc(gc_job_manager_sptr mgr)
+{
+  return make_jd_sptr(mgr, mgr->alloc_job_desc());
+}
+
+
+// ------------------------------------------------------------------------
+
+
+// custom deleter
+class spe_program_handle_deleter {
+public:
+  void operator()(spe_program_handle_t *program) {
+    if (program){
+      int r = spe_image_close(program);
+      if (r != 0){
+	perror("spe_image_close");
+      }
+    }
+  }
+};
+
+// nop custom deleter
+class nop_spe_program_handle_deleter {
+public:
+  void operator()(spe_program_handle_t *program) {
+  }
+};
+
+spe_program_handle_sptr 
+gc_program_handle_from_filename(const std::string &filename)
+{
+  return spe_program_handle_sptr(spe_image_open(filename.c_str()),
+				 spe_program_handle_deleter());
+}
+
+
+spe_program_handle_sptr 
+gc_program_handle_from_address(spe_program_handle_t *handle)
+{
+  return spe_program_handle_sptr(handle, nop_spe_program_handle_deleter());
+}
+
+const std::string
+gc_job_status_string(gc_job_status_t status)
+{
+  switch(status){
+  case JS_OK:			return "JS_OK";
+  case JS_SHUTTING_DOWN:	return "JS_SHUTTING_DOWN";
+  case JS_TOO_MANY_CLIENTS:	return "JS_TOO_MANY_CLIENTS";
+  case JS_UNKNOWN_PROC:		return "JS_UNKNOWN_PROC";
+  case JS_BAD_DIRECTION:	return "JS_BAD_DIRECTION";
+  case JS_BAD_EAH:		return "JS_BAD_EAH";
+  case JS_BAD_N_DIRECT:		return "JS_BAD_N_DIRECT";
+  case JS_BAD_N_EA:		return "JS_BAD_N_EA";
+  case JS_ARGS_TOO_LONG:	return "JS_ARGS_TOO_LONG";
+  case JS_BAD_JUJU:		return "JS_BAD_JUJU";
+  case JS_BAD_JOB_DESC:		return "JS_BAD_JOB_DESC";
+  default:
+    char buf[100];
+    snprintf(buf, sizeof(buf), "unknown gc_job_status_t (%d)\n", status);
+    return buf;
+  }
+}
+
+/*
+ * exception classes
+ */
+
+gc_exception::gc_exception(const std::string &msg)
+  : runtime_error(msg)
+{
+}
+
+gc_unknown_proc::gc_unknown_proc(const std::string &msg)
+  : gc_exception("gc_unknown_proc: " + msg)
+{
+}
+
+gc_bad_alloc::gc_bad_alloc(const std::string &msg)
+  : gc_exception("gc_bad_alloc: " + msg)
+{
+}
+
+gc_bad_align::gc_bad_align(const std::string &msg)
+  : gc_exception("gc_bad_align: " + msg)
+{
+}
+
+gc_bad_submit::gc_bad_submit(const std::string &name, gc_job_status_t status)
+  : gc_exception("gc_bad_submit(" + name + "): " + gc_job_status_string(status))
+{
+}
diff --git a/gcell/lib/runtime/gc_job_manager_impl.cc b/gcell/lib/runtime/gc_job_manager_impl.cc
new file mode 100644
index 000000000..629019f4d
--- /dev/null
+++ b/gcell/lib/runtime/gc_job_manager_impl.cc
@@ -0,0 +1,1249 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "gc_job_manager_impl.h"
+#include <gcell/gc_mbox.h>
+#include <gcell/gc_aligned_alloc.h>
+#include <gcell/memory_barrier.h>
+#include <gc_proc_def_utils.h>
+#include <atomic_dec_if_positive.h>
+#include <stdio.h>
+#include <stdexcept>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+
+static const size_t CACHE_LINE_SIZE = 128;
+
+static const unsigned int DEFAULT_MAX_JOBS = 128;
+static const unsigned int DEFAULT_MAX_CLIENT_THREADS = 64;
+
+// FIXME this really depends on the SPU code...
+static const unsigned int MAX_TOTAL_INDIRECT_LENGTH = 16 * 1024;
+
+
+static bool          s_key_initialized = false;
+static pthread_key_t s_client_key;
+
+static int s_worker_debug = 0;
+
+// custom deleter of gang_contexts for use with boost::shared_ptr
+class gang_deleter {
+public:
+  void operator()(spe_gang_context_ptr_t ctx) {
+    if (ctx){
+      int r = spe_gang_context_destroy(ctx);
+      if (r != 0){
+	perror("spe_gang_context_destroy");
+      }
+    }
+  }
+};
+
+
+// custom deleter of anything that can be freed with "free"
+class free_deleter {
+public:
+  void operator()(void *p) {
+    free(p);
+  }
+};
+
+
+/*
+ * Called when client thread is destroyed.
+ * We mark our client info free.
+ */
+static void
+client_key_destructor(void *p)
+{
+  ((gc_client_thread_info *) p)->d_free = 1;
+}
+
+static bool
+is_power_of_2(uint32_t x)
+{
+  return (x != 0) && !(x & (x - 1));
+}
+
+////////////////////////////////////////////////////////////////////////
+
+
+gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
+  : d_debug(0), d_spu_args(0),
+    d_eh_cond(&d_eh_mutex), d_eh_thread(0), d_eh_state(EHS_INIT),
+    d_shutdown_requested(false),
+    d_client_thread(0), d_ea_args_maxsize(0),
+    d_proc_def(0), d_proc_def_ls_addr(0), d_nproc_defs(0)
+{
+  if (!s_key_initialized){
+    int r = pthread_key_create(&s_client_key, client_key_destructor);
+    if (r != 0)
+      throw std::runtime_error("pthread_key_create");
+    s_key_initialized = true;
+  }
+
+  // ensure it's zero
+  pthread_setspecific(s_client_key, 0);
+
+  if (options != 0)
+    d_options = *options;
+
+  // provide the real default for those indicated with a zero
+  if (d_options.max_jobs == 0)
+    d_options.max_jobs = DEFAULT_MAX_JOBS;
+  if (d_options.max_client_threads == 0)
+    d_options.max_client_threads = DEFAULT_MAX_CLIENT_THREADS;
+
+  if (!d_options.program_handle){
+    fprintf(stderr, "gc_job_manager: options->program_handle must be non-zero\n");
+    throw std::runtime_error("gc_job_manager: options->program_handle must be non-zero");
+  }
+
+  int ncpu_nodes = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
+  int nusable_spes = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
+
+  if (debug()){
+    printf("cpu_nodes = %d\n", ncpu_nodes);
+    for (int i = 0; i < ncpu_nodes; i++){
+      printf("node[%d].physical_spes = %2d\n", i,
+	     spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, i));
+      printf("node[%d].usable_spes   = %2d\n", i,
+	     spe_cpu_info_get(SPE_COUNT_USABLE_SPES, i));
+    }
+  }
+
+  // clamp nspes
+  d_options.nspes = std::min(d_options.nspes, (unsigned int) MAX_SPES);
+  nusable_spes = std::min(nusable_spes, (int) MAX_SPES);
+
+  //
+  // sanity check requested number of spes.
+  //
+  if (d_options.nspes == 0)	// use all of them
+    d_options.nspes = nusable_spes;
+  else {
+    if (d_options.nspes > (unsigned int) nusable_spes){
+      fprintf(stderr,
+	      "gc_job_manager: warning: caller requested %d spes.  There are only %d available.\n",
+	      d_options.nspes, nusable_spes);
+      if (d_options.gang_schedule){
+	// If we're gang scheduling we'll never get scheduled if we
+	// ask for more than are available.
+	throw std::out_of_range("gang_scheduling: not enough spes available");
+      }
+      else {	// FIXME clamp to usable.  problem on PS3 when overcommited
+	fprintf(stderr, "gc_job_manager: clamping nspes to %d\n", nusable_spes);
+	d_options.nspes = nusable_spes;
+      }
+    }
+  }
+
+  if (d_options.use_affinity){
+    printf("gc_job_manager: warning: affinity request was ignored\n");
+  }
+
+  if (d_options.gang_schedule){
+    d_gang = spe_gang_context_sptr(spe_gang_context_create(0), gang_deleter());
+    if (!d_gang){
+      perror("gc_job_manager_impl[spe_gang_context_create]");
+      throw std::runtime_error("spe_gang_context_create");
+    }
+  }
+
+  // ----------------------------------------------------------------
+  // initalize the job queue
+  
+  d_queue = (gc_jd_queue_t *) gc_aligned_alloc(sizeof(gc_jd_queue_t), CACHE_LINE_SIZE);
+  _d_queue_boost =
+    boost::shared_ptr<void>((void *) d_queue, free_deleter());
+  gc_jd_queue_init(d_queue);
+
+
+  // ----------------------------------------------------------------
+  // create the spe contexts
+
+  // 1 spu_arg struct for each SPE
+  assert(sizeof(gc_spu_args_t) % 16 == 0);
+  d_spu_args =
+    (gc_spu_args_t *) gc_aligned_alloc(MAX_SPES * sizeof(gc_spu_args_t), 16);
+  _d_spu_args_boost =
+    boost::shared_ptr<void>((void *) d_spu_args, free_deleter());
+
+  // 2 completion info structs for each SPE (we double buffer them)
+  assert(sizeof(gc_comp_info_t) % CACHE_LINE_SIZE == 0);
+  d_comp_info =
+    (gc_comp_info_t *) gc_aligned_alloc(2 * MAX_SPES * sizeof(gc_comp_info_t),
+					CACHE_LINE_SIZE);
+  _d_comp_info_boost =
+    boost::shared_ptr<void>((void *) d_comp_info, free_deleter());
+
+
+  // get a handle to the spe program
+
+  spe_program_handle_t *spe_image = d_options.program_handle.get();
+
+  // fish proc_def table out of SPE ELF file
+
+  if (!gcpd_find_table(spe_image, &d_proc_def, &d_nproc_defs, &d_proc_def_ls_addr)){
+    fprintf(stderr, "gc_job_manager_impl: couldn't find gc_proc_defs in SPE ELF file.\n");
+    throw std::runtime_error("no gc_proc_defs");
+  }
+  // fprintf(stderr, "d_proc_def_ls_addr = 0x%0x\n", d_proc_def_ls_addr);
+
+  int spe_flags = (SPE_EVENTS_ENABLE
+		   | SPE_CFG_SIGNOTIFY1_OR
+		   | SPE_CFG_SIGNOTIFY2_OR);
+  
+  for (unsigned int i = 0; i < d_options.nspes; i++){
+    // FIXME affinity stuff goes here
+    d_worker[i].spe_ctx = spe_context_create(spe_flags, d_gang.get());;
+    if (d_worker[i].spe_ctx == 0){
+      perror("spe_context_create");
+      throw std::runtime_error("spe_context_create");
+    }
+    d_worker[i].spe_idx = i;
+    d_worker[i].spu_args = &d_spu_args[i];
+    d_worker[i].spu_args->queue = ptr_to_ea(d_queue);
+    d_worker[i].spu_args->comp_info[0] = ptr_to_ea(&d_comp_info[2*i+0]);
+    d_worker[i].spu_args->comp_info[1] = ptr_to_ea(&d_comp_info[2*i+1]);
+    d_worker[i].spu_args->spu_idx = i;
+    d_worker[i].spu_args->nspus = d_options.nspes;
+    d_worker[i].spu_args->proc_def_ls_addr = d_proc_def_ls_addr;
+    d_worker[i].spu_args->nproc_defs = d_nproc_defs;
+    d_worker[i].spu_args->log.base = 0;
+    d_worker[i].spu_args->log.nentries = 0;
+    d_worker[i].state = WS_INIT;
+
+    int r = spe_program_load(d_worker[i].spe_ctx, spe_image);
+    if (r != 0){
+      perror("spe_program_load");
+      throw std::runtime_error("spe_program_load");
+    }
+  }
+
+  setup_logfiles();
+
+  // ----------------------------------------------------------------
+  // initalize the free list of job descriptors
+  
+  d_free_list = (gc_jd_stack_t *) gc_aligned_alloc(sizeof(gc_jd_stack_t), CACHE_LINE_SIZE);
+  // This ensures that the memory associated with d_free_list is
+  // automatically freed in the destructor or if an exception occurs
+  // here in the constructor.
+  _d_free_list_boost =
+    boost::shared_ptr<void>((void *) d_free_list, free_deleter());
+  gc_jd_stack_init(d_free_list);
+
+  if (debug()){
+    printf("sizeof(d_jd[0]) = %d (0x%x)\n", sizeof(d_jd[0]), sizeof(d_jd[0]));
+    printf("max_jobs = %u\n", d_options.max_jobs);
+  }
+
+  // Initialize the array of job descriptors.
+  d_jd = (gc_job_desc_t *) gc_aligned_alloc(sizeof(d_jd[0]) * d_options.max_jobs, CACHE_LINE_SIZE);
+  _d_jd_boost = boost::shared_ptr<void>((void *) d_jd, free_deleter());
+
+
+  // set unique job_id
+  for (int i = 0; i < (int) d_options.max_jobs; i++)
+    d_jd[i].sys.job_id = i;
+
+  // push them onto the free list
+  for (int i = d_options.max_jobs - 1; i >= 0; i--)
+    free_job_desc(&d_jd[i]);
+
+  // ----------------------------------------------------------------
+  // initialize d_client_thread
+
+  {
+    gc_client_thread_info_sa cti(
+         new gc_client_thread_info[d_options.max_client_threads]);
+
+    d_client_thread.swap(cti);
+
+    for (unsigned int i = 0; i < d_options.max_client_threads; i++)
+      d_client_thread[i].d_client_id = i;
+  }
+
+  // ----------------------------------------------------------------
+  // initialize bitvectors
+
+  // initialize d_bvlen, the number of longs in job related bitvectors.
+  int bits_per_long = sizeof(unsigned long) * 8;
+  d_bvlen = (d_options.max_jobs + bits_per_long - 1) / bits_per_long;
+
+  // allocate all bitvectors in a single cache-aligned chunk
+  size_t nlongs = d_bvlen * d_options.max_client_threads;
+  void *p = gc_aligned_alloc(nlongs * sizeof(unsigned long), CACHE_LINE_SIZE);
+  _d_all_bitvectors = boost::shared_ptr<void>(p, free_deleter());
+
+  // Now point the gc_client_thread_info bitvectors into this storage
+  unsigned long *v = (unsigned long *) p;
+
+  for (unsigned int i = 0; i < d_options.max_client_threads; i++, v += d_bvlen)
+    d_client_thread[i].d_jobs_done = v;
+
+
+  // ----------------------------------------------------------------
+  // create the spe event handler & worker (SPE) threads
+
+  create_event_handler();
+
+}
+
+////////////////////////////////////////////////////////////////////////
+
+gc_job_manager_impl::~gc_job_manager_impl()
+{
+  shutdown();
+
+  d_jd = 0;		// handled via _d_jd_boost
+  d_free_list = 0;	// handled via _d_free_list_boost
+  d_queue = 0;		// handled via _d_queue_boost
+
+  // clear cti, since we've deleted the underlying data
+  pthread_setspecific(s_client_key, 0);
+
+  unmap_logfiles();
+}
+
+bool
+gc_job_manager_impl::shutdown()
+{
+  omni_mutex_lock	l(d_eh_mutex);
+
+  d_shutdown_requested = true;		// set flag for event handler thread
+
+  // should only happens during early QA code
+  if (d_eh_thread == 0 && d_eh_state == EHS_INIT)
+    return false;
+
+  while (d_eh_state != EHS_DEAD)	// wait for it to finish
+    d_eh_cond.wait();
+
+  return true;
+}
+
+int
+gc_job_manager_impl::nspes() const
+{
+  return d_options.nspes;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+void
+gc_job_manager_impl::bv_zero(unsigned long *bv)
+{
+  memset(bv, 0, sizeof(unsigned long) * d_bvlen);
+}
+
+inline void
+gc_job_manager_impl::bv_clr(unsigned long *bv, unsigned int bitno)
+{
+  unsigned int wi = bitno / (sizeof (unsigned long) * 8);
+  unsigned int bi = bitno & ((sizeof (unsigned long) * 8) - 1);
+  bv[wi] &= ~(1UL << bi);
+}
+
+inline void
+gc_job_manager_impl::bv_set(unsigned long *bv, unsigned int bitno)
+{
+  unsigned int wi = bitno / (sizeof (unsigned long) * 8);
+  unsigned int bi = bitno & ((sizeof (unsigned long) * 8) - 1);
+  bv[wi] |= (1UL << bi);
+}
+
+inline bool
+gc_job_manager_impl::bv_isset(unsigned long *bv, unsigned int bitno)
+{
+  unsigned int wi = bitno / (sizeof (unsigned long) * 8);
+  unsigned int bi = bitno & ((sizeof (unsigned long) * 8) - 1);
+  return (bv[wi] & (1UL << bi)) != 0;
+}
+
+inline bool
+gc_job_manager_impl::bv_isclr(unsigned long *bv, unsigned int bitno)
+{
+  unsigned int wi = bitno / (sizeof (unsigned long) * 8);
+  unsigned int bi = bitno & ((sizeof (unsigned long) * 8) - 1);
+  return (bv[wi] & (1UL << bi)) == 0;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+gc_job_desc *
+gc_job_manager_impl::alloc_job_desc()
+{
+  // stack is lock free, and safe to call from any thread
+  gc_job_desc *jd = gc_jd_stack_pop(d_free_list);
+  if (jd == 0)
+    throw gc_bad_alloc("alloc_job_desc: none available");
+
+  return jd;
+}
+
+void
+gc_job_manager_impl::free_job_desc(gc_job_desc *jd)
+{
+  // stack is lock free, thus safe to call from any thread
+  if (jd != 0)
+    gc_jd_stack_push(d_free_list, jd);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+/*
+ * We check as much as we can here on the PPE side, so that the SPE
+ * doesn't have to.
+ */
+static bool
+check_direct_args(gc_job_desc *jd, gc_job_direct_args *args)
+{
+  if (args->nargs > MAX_ARGS_DIRECT){
+    jd->status = JS_BAD_N_DIRECT;
+    return false;
+  }
+
+  return true;
+}
+
+static bool
+check_ea_args(gc_job_desc *jd, gc_job_ea_args *p)
+{
+  if (p->nargs > MAX_ARGS_EA){
+    jd->status = JS_BAD_N_EA;
+    return false;
+  }
+
+  uint32_t dir_union = 0;
+
+  for (unsigned int i = 0; i < p->nargs; i++){
+    dir_union |= p->arg[i].direction;
+    switch(p->arg[i].direction){
+    case GCJD_DMA_GET:
+    case GCJD_DMA_PUT:
+      break;
+
+    default:
+      jd->status = JS_BAD_DIRECTION;
+      return false;
+    }
+  }
+
+  if (p->nargs > 1){
+    unsigned int common_eah = (p->arg[0].ea_addr) >> 32;
+    for (unsigned int i = 1; i < p->nargs; i++){
+      if ((p->arg[i].ea_addr >> 32) != common_eah){
+	jd->status = JS_BAD_EAH;
+	return false;
+      }
+    }
+  }
+
+  jd->sys.direction_union = dir_union;
+  return true;
+}
+
+bool
+gc_job_manager_impl::submit_job(gc_job_desc *jd)
+{
+  if (unlikely(d_shutdown_requested)){
+    jd->status = JS_SHUTTING_DOWN;
+    return false;
+  }
+
+  // Ensure it's one of our job descriptors
+
+  if (jd < d_jd || jd >= &d_jd[d_options.max_jobs]){
+    jd->status = JS_BAD_JOB_DESC;
+    return false;
+  }
+
+  // Ensure we've got a client_thread_info assigned to this thread.
+  
+  gc_client_thread_info *cti =
+    (gc_client_thread_info *) pthread_getspecific(s_client_key);
+  if (unlikely(cti == 0)){
+    if ((cti = alloc_cti()) == 0){
+      fprintf(stderr, "gc_job_manager_impl::submit_job: Too many client threads.\n");
+      jd->status = JS_TOO_MANY_CLIENTS;
+      return false;
+    }
+    int r = pthread_setspecific(s_client_key, cti);
+    if (r != 0){
+      jd->status = JS_BAD_JUJU;
+      fprintf(stderr, "pthread_setspecific failed (return = %d)\n", r);
+      return false;
+    }
+  }
+
+  if (jd->proc_id == GCP_UNKNOWN_PROC){
+    jd->status = JS_UNKNOWN_PROC;
+    return false;
+  }
+
+  if (!check_direct_args(jd, &jd->input))
+    return false;
+
+  if (!check_direct_args(jd, &jd->output))
+    return false;
+
+  if (!check_ea_args(jd, &jd->eaa))
+    return false;
+
+  jd->status = JS_OK;
+  jd->sys.client_id = cti->d_client_id;
+
+  // FIXME keep count of jobs in progress?
+  
+  gc_jd_queue_enqueue(d_queue, jd);
+  return true;
+}
+
+bool
+gc_job_manager_impl::wait_job(gc_job_desc *jd)
+{
+  bool done;
+  return wait_jobs(1, &jd, &done, GC_WAIT_ANY) == 1 && jd->status == JS_OK;
+}
+
+int
+gc_job_manager_impl::wait_jobs(unsigned int njobs,
+			       gc_job_desc *jd[],
+			       bool done[],
+			       gc_wait_mode mode)
+{
+  unsigned int i;
+
+  gc_client_thread_info *cti =
+    (gc_client_thread_info *) pthread_getspecific(s_client_key);
+  if (unlikely(cti == 0))
+    return -1;
+
+  for (i = 0; i < njobs; i++){
+    done[i] = false;
+    if (unlikely(jd[i]->sys.client_id != cti->d_client_id)){
+      fprintf(stderr, "gc_job_manager_impl::wait_jobs: can't wait for a job you didn't submit\n");
+      return -1;
+    }
+  }
+
+  {
+    omni_mutex_lock	l(cti->d_mutex);
+
+    // setup info for event handler
+    cti->d_state = (mode == GC_WAIT_ANY) ? CT_WAIT_ANY : CT_WAIT_ALL;
+    cti->d_njobs_waiting_for = njobs;
+    cti->d_jobs_waiting_for = jd;
+    assert(cti->d_jobs_done != 0);
+
+    unsigned int ndone = 0;
+
+    // wait for jobs to complete
+    
+    while (1){
+      ndone = 0;
+      for (i= 0; i < njobs; i++){
+	if (done[i])
+	  ndone++;
+	else if (bv_isset(cti->d_jobs_done, jd[i]->sys.job_id)){
+	  bv_clr(cti->d_jobs_done, jd[i]->sys.job_id);
+	  done[i] = true;
+	  ndone++;
+	}
+      }
+
+      if (mode == GC_WAIT_ANY && ndone > 0)
+	break;
+
+      if (mode == GC_WAIT_ALL && ndone == njobs)
+	break;
+
+      // FIXME what happens when somebody calls shutdown?
+
+      cti->d_cond.wait();	// wait for event handler to wake us up
+    }
+
+    cti->d_state = CT_NOT_WAITING;  
+    cti->d_njobs_waiting_for = 0;	// tidy up (not reqd)
+    cti->d_jobs_waiting_for = 0;	// tidy up (not reqd)
+    return ndone;
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+
+bool
+gc_job_manager_impl::send_all_spes(uint32_t msg)
+{
+  bool ok = true;
+
+  for (unsigned int i = 0; i < d_options.nspes; i++)
+    ok &= send_spe(i, msg);
+
+  return ok;
+}
+
+bool
+gc_job_manager_impl::send_spe(unsigned int spe, uint32_t msg)
+{
+  if (spe >= d_options.nspes)
+    return false;
+
+  int r = spe_in_mbox_write(d_worker[spe].spe_ctx, &msg, 1,
+			    SPE_MBOX_ALL_BLOCKING);
+  if (r < 0){
+    perror("spe_in_mbox_write");
+    return false;
+  }
+
+  return r == 1;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static void
+pthread_create_failure_msg(int r, const char *which)
+{
+  char buf[256];
+  char *s = 0;
+
+  switch (r){
+  case EAGAIN: s = "EAGAIN"; break;
+  case EINVAL: s = "EINVAL"; break;
+  case EPERM:  s = "EPERM";  break;
+  default:
+    snprintf(buf, sizeof(buf), "Unknown error %d", r);
+    s = buf;
+    break;
+  }
+  fprintf(stderr, "pthread_create[%s] failed: %s\n", which, s);
+}
+
+
+static bool
+start_thread(pthread_t *thread,
+	     void *(*start_routine)(void *),  void *arg,
+	     const char *msg)
+{
+  pthread_attr_t attr;
+  pthread_attr_init(&attr);
+  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+  // FIXME save sigprocmask
+  // FIXME set sigprocmask
+
+  int r = pthread_create(thread, &attr, start_routine, arg);
+    
+  // FIXME restore sigprocmask
+
+  if (r != 0){
+    pthread_create_failure_msg(r, msg);
+    return false;
+  }
+  return true;
+}
+
+
+////////////////////////////////////////////////////////////////////////
+
+static void *start_worker(void *arg);
+
+static void *
+start_event_handler(void *arg)
+{
+  gc_job_manager_impl *p = (gc_job_manager_impl *) arg;
+  p->event_handler_loop();
+  return 0;
+}
+
+void
+gc_job_manager_impl::create_event_handler()
+{
+  // create the SPE event handler and register our interest in events
+
+  d_spe_event_handler.ptr = spe_event_handler_create();
+  if (d_spe_event_handler.ptr == 0){
+    perror("spe_event_handler_create");
+    throw std::runtime_error("spe_event_handler_create");
+  }
+
+  for (unsigned int i = 0; i < d_options.nspes; i++){
+    spe_event_unit_t	eu;
+    memset(&eu, 0, sizeof(eu));
+    eu.events = SPE_EVENT_OUT_INTR_MBOX | SPE_EVENT_SPE_STOPPED;
+    eu.spe = d_worker[i].spe_ctx;
+    eu.data.u32 = i;	// set in events returned by spe_event_wait
+
+    if (spe_event_handler_register(d_spe_event_handler.ptr, &eu) != 0){
+      perror("spe_event_handler_register");
+      throw std::runtime_error("spe_event_handler_register");
+    }
+  }
+
+  // create our event handling thread
+
+  if (!start_thread(&d_eh_thread, start_event_handler, this, "event_handler")){
+    throw std::runtime_error("pthread_create");
+  }
+
+  // create the SPE worker threads
+
+  bool ok = true;
+  for (unsigned int i = 0; ok && i < d_options.nspes; i++){
+    char name[256];
+    snprintf(name, sizeof(name), "worker[%d]", i);
+    ok &= start_thread(&d_worker[i].thread, start_worker,
+		       &d_worker[i], name);
+  }
+
+  if (!ok){
+    //
+    // FIXME Clean up the mess.  Need to terminate event handler and all workers.
+    //
+    // this should cause the workers to exit, unless they're seriously broken
+    send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
+
+    shutdown();
+
+    throw std::runtime_error("pthread_create");
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+
+void
+gc_job_manager_impl::set_eh_state(evt_handler_state s)
+{
+  omni_mutex_lock	l(d_eh_mutex);
+  d_eh_state = s;
+  d_eh_cond.broadcast();
+}
+
+void
+gc_job_manager_impl::set_ea_args_maxsize(int maxsize)
+{
+  omni_mutex_lock	l(d_eh_mutex);
+  d_ea_args_maxsize = maxsize;
+  d_eh_cond.broadcast();
+}
+
+void
+gc_job_manager_impl::print_event(spe_event_unit_t *evt)
+{
+  printf("evt: spe = %d events = (0x%x)", evt->data.u32, evt->events);
+
+  if (evt->events & SPE_EVENT_OUT_INTR_MBOX)
+    printf(" OUT_INTR_MBOX");
+  
+  if (evt->events & SPE_EVENT_IN_MBOX)
+    printf(" IN_MBOX");
+  
+  if (evt->events & SPE_EVENT_TAG_GROUP)
+    printf(" TAG_GROUP");
+  
+  if (evt->events & SPE_EVENT_SPE_STOPPED)
+    printf(" SPE_STOPPED");
+
+  printf("\n");
+}
+
+struct job_client_info {
+  uint16_t	job_id;
+  uint16_t	client_id;
+};
+
+static int
+compare_jci_clients(const void *va, const void *vb)
+{
+  const job_client_info *a = (job_client_info *) va;
+  const job_client_info *b = (job_client_info *) vb;
+
+  return a->client_id - b->client_id;
+}
+
+void
+gc_job_manager_impl::notify_clients_jobs_are_done(unsigned int spe_num,
+						  unsigned int completion_info_idx)
+{
+  const char *msg = "gc_job_manager_impl::notify_client_job_is_done (INTERNAL ERROR)";
+
+  smp_rmb();  // order reads so we know that data sent from SPE is here
+
+  gc_comp_info_t *ci = &d_comp_info[2 * spe_num + (completion_info_idx & 0x1)];
+
+  if (ci->ncomplete == 0){	// never happens, but ensures code below is correct
+    ci->in_use = 0;
+    return;
+  }
+
+  if (0){
+    static int total_jobs;
+    static int total_msgs;
+    total_msgs++;
+    total_jobs += ci->ncomplete;
+    printf("ppe:     tj = %6d  tm = %6d\n", total_jobs, total_msgs);
+  }
+
+  job_client_info gci[GC_CI_NJOBS];
+
+  /*
+   * Make one pass through and sanity check everything while filling in gci
+   */
+  for (unsigned int i = 0; i < ci->ncomplete; i++){
+    unsigned int job_id = ci->job_id[i];
+
+    if (job_id >= d_options.max_jobs){
+      // internal error, shouldn't happen
+      fprintf(stderr,"%s: invalid job_id = %d\n", msg, job_id);
+      ci->in_use = 0;		// clear flag so SPE knows we're done with it
+      return;
+    }
+    gc_job_desc *jd = &d_jd[job_id];
+
+    if (jd->sys.client_id >= d_options.max_client_threads){
+      // internal error, shouldn't happen
+      fprintf(stderr, "%s: invalid client_id = %d\n", msg, jd->sys.client_id);
+      ci->in_use = 0;		// clear flag so SPE knows we're done with it
+      return;
+    }
+
+    gci[i].job_id = job_id;
+    gci[i].client_id = jd->sys.client_id;
+  }
+
+  // sort by client_id so we only have to lock & signal once / client
+
+  if (ci->ncomplete > 1)
+    qsort(gci, ci->ncomplete, sizeof(gci[0]), compare_jci_clients);
+
+  // "wind-in" 
+
+  gc_client_thread_info *last_cti = &d_client_thread[gci[0].client_id];
+  last_cti->d_mutex.lock();
+  bv_set(last_cti->d_jobs_done, gci[0].job_id);  // mark job done
+
+  for (unsigned int i = 1; i < ci->ncomplete; i++){
+
+    gc_client_thread_info *cti = &d_client_thread[gci[i].client_id];
+
+    if (cti != last_cti){	// new client?
+
+      // yes.  signal old client, unlock old, lock new
+
+      // FIXME we could distinguish between CT_WAIT_ALL & CT_WAIT_ANY
+
+      if (last_cti->d_state == CT_WAIT_ANY || last_cti->d_state == CT_WAIT_ALL)
+	last_cti->d_cond.signal();	// wake client thread up
+
+      last_cti->d_mutex.unlock();
+      cti->d_mutex.lock();
+      last_cti = cti;
+    }
+
+    // mark job done
+    bv_set(cti->d_jobs_done, gci[i].job_id);
+  }
+
+  // "wind-out"
+
+  if (last_cti->d_state == CT_WAIT_ANY || last_cti->d_state == CT_WAIT_ALL)
+    last_cti->d_cond.signal();	// wake client thread up
+  last_cti->d_mutex.unlock();
+
+  ci->in_use = 0;		// clear flag so SPE knows we're done with it
+}
+
+void
+gc_job_manager_impl::handle_event(spe_event_unit_t *evt)
+{
+  // print_event(evt);
+
+  int spe_num = evt->data.u32;
+
+  // only a single event type can be signaled at a time
+  
+  if (evt->events == SPE_EVENT_OUT_INTR_MBOX) { // SPE sent us 1 or more msgs
+    static const int NMSGS = 32;
+    unsigned int msg[NMSGS];
+    int n = spe_out_intr_mbox_read(evt->spe, msg, NMSGS, SPE_MBOX_ANY_BLOCKING);
+    // printf("spe_out_intr_mbox_read = %d\n", n);
+    if (n < 0){
+      perror("spe_out_intr_mbox_read");
+    }
+    else {
+      for (int i = 0; i < n; i++){
+	switch(MBOX_MSG_OP(msg[i])){
+	case OP_JOBS_DONE:
+	  if (debug())
+	    printf("eh: job_done (0x%08x) from spu[%d]\n", msg[i], spe_num);
+	  notify_clients_jobs_are_done(spe_num, MBOX_MSG_ARG(msg[i]));
+	  break;
+
+	case OP_SPU_BUFSIZE:
+	  set_ea_args_maxsize(MBOX_MSG_ARG(msg[i]));
+	  break;
+
+	case OP_EXIT:
+	default:
+	  printf("eh: Unexpected msg (0x%08x) from spu[%d]\n", msg[i], spe_num);
+	  break;
+	}
+      }
+    }
+  }
+  else if (evt->events == SPE_EVENT_SPE_STOPPED){ // the SPE stopped
+    spe_stop_info_t si;
+    int r = spe_stop_info_read(evt->spe, &si);
+    if (r < 0){
+      perror("spe_stop_info_read");
+    }
+    else {
+      switch (si.stop_reason){
+      case SPE_EXIT:
+	if (debug()){
+	  printf("eh: spu[%d] SPE_EXIT w/ exit_code = %d\n",
+		 spe_num, si.result.spe_exit_code);
+	}
+	break;
+      case SPE_STOP_AND_SIGNAL:
+	printf("eh: spu[%d] SPE_STOP_AND_SIGNAL w/ spe_signal_code = 0x%x\n",
+	       spe_num, si.result.spe_signal_code);
+	break;
+      case SPE_RUNTIME_ERROR:
+	printf("eh: spu[%d] SPE_RUNTIME_ERROR w/ spe_runtime_error = 0x%x\n",
+	       spe_num, si.result.spe_runtime_error);
+	break;
+      case SPE_RUNTIME_EXCEPTION:
+	printf("eh: spu[%d] SPE_RUNTIME_EXCEPTION w/ spe_runtime_exception = 0x%x\n",
+	       spe_num, si.result.spe_runtime_exception);
+	break;
+      case SPE_RUNTIME_FATAL:
+	printf("eh: spu[%d] SPE_RUNTIME_FATAL w/ spe_runtime_fatal = 0x%x\n",
+	       spe_num, si.result.spe_runtime_fatal);
+	break;
+      case SPE_CALLBACK_ERROR:
+	printf("eh: spu[%d] SPE_CALLBACK_ERROR w/ spe_callback_error = 0x%x\n",
+	       spe_num, si.result.spe_callback_error);
+	break;
+      case SPE_ISOLATION_ERROR:
+	printf("eh: spu[%d] SPE_ISOLATION_ERROR w/ spe_isolation_error = 0x%x\n",
+	       spe_num, si.result.spe_isolation_error);
+	break;
+      default:
+	printf("eh: spu[%d] UNKNOWN STOP REASON (%d) w/ spu_status = 0x%x\n",
+	       spe_num, si.stop_reason, si.spu_status);
+	break;
+      }
+    }
+  }
+#if 0 // not enabled
+  else if (evt->events == SPE_EVENT_IN_MBOX){	 // there's room to write to SPE
+    // spe_in_mbox_write (ignore)
+  }
+  else if (evt->events == SPE_EVENT_TAG_GROUP){	 // our DMA completed
+    // spe_mfcio_tag_status_read
+  }
+#endif
+  else {
+    fprintf(stderr, "handle_event: unexpected evt->events = 0x%x\n", evt->events);
+    return;
+  }
+}
+
+//
+// This is the "main program" of the event handling thread
+//
+void
+gc_job_manager_impl::event_handler_loop()
+{
+  static const int MAX_EVENTS = 16;
+  static const int TIMEOUT = 20;	// how long to block in milliseconds
+
+  spe_event_unit_t events[MAX_EVENTS];
+
+  if (d_debug)
+    printf("event_handler_loop: starting\n");
+
+  set_eh_state(EHS_RUNNING);
+
+  // ask the first spe for its max bufsize
+  send_spe(0, MK_MBOX_MSG(OP_GET_SPU_BUFSIZE, 0));
+
+  while (1){
+    switch(d_eh_state){
+
+    case EHS_RUNNING:      // normal stuff
+      if (d_shutdown_requested) {
+	set_eh_state(EHS_SHUTTING_DOWN);
+      }
+      break;
+
+    case EHS_SHUTTING_DOWN:
+
+      // FIXME wait until job queue is empty, then tell them to exit
+
+      send_all_spes(MK_MBOX_MSG(OP_EXIT, 0));
+      set_eh_state(EHS_WAITING_FOR_WORKERS_TO_DIE);
+      break;
+
+    case EHS_WAITING_FOR_WORKERS_TO_DIE:
+      {
+	bool all_dead = true;
+	for (unsigned int i = 0; i < d_options.nspes; i++)
+	  all_dead &= d_worker[i].state == WS_DEAD;
+
+	if (all_dead){
+	  set_eh_state(EHS_DEAD);
+	  if (d_debug)
+	    printf("event_handler_loop: exiting\n");
+	  return;
+	}
+      }
+      break;
+
+    default:
+      set_eh_state(EHS_DEAD);
+      printf("event_handler_loop(default): exiting\n");
+      return;
+    }
+
+    // block waiting for events...
+    int nevents = spe_event_wait(d_spe_event_handler.ptr,
+				 events, MAX_EVENTS, TIMEOUT);
+    if (nevents < 0){
+      perror("spe_wait_event");
+      // FIXME bail?
+    }
+    for (int i = 0; i < nevents; i++){
+      handle_event(&events[i]);
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+// This is the top of the SPE worker threads
+
+static void *
+start_worker(void *arg)
+{
+  worker_ctx *w = (worker_ctx *) arg;
+  spe_stop_info_t	si;
+
+  w->state = WS_RUNNING;
+  if (s_worker_debug)
+    printf("worker[%d]: WS_RUNNING\n", w->spe_idx);
+
+  unsigned int entry = SPE_DEFAULT_ENTRY;
+  int r = spe_context_run(w->spe_ctx,  &entry, 0, w->spu_args, 0, &si);
+
+  if (r < 0){			// error
+    char buf[64];
+    snprintf(buf, sizeof(buf), "worker[%d]: spe_context_run", w->spe_idx);
+    perror(buf);
+  }
+  else if (r == 0){
+    // spe program called exit.
+    if (s_worker_debug)
+      printf("worker[%d]: SPE_EXIT w/ exit_code = %d\n",
+	     w->spe_idx, si.result.spe_exit_code);
+  }
+  else {
+    // called stop_and_signal
+    //
+    // I'm not sure we'll ever get here.  I think the event
+    // handler will catch this...
+    printf("worker[%d]: SPE_STOP_AND_SIGNAL w/ spe_signal_code = 0x%x\n",
+	   w->spe_idx, si.result.spe_signal_code);
+  }
+
+  // in any event, we're committing suicide now ;)
+  if (s_worker_debug)
+    printf("worker[%d]: WS_DEAD\n", w->spe_idx);
+
+  w->state = WS_DEAD;
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+gc_client_thread_info *
+gc_job_manager_impl::alloc_cti()
+{
+  for (unsigned int i = 0; i < d_options.max_client_threads; i++){
+    if (d_client_thread[i].d_free){
+      // try to atomically grab it
+      if (_atomic_dec_if_positive(ptr_to_ea(&d_client_thread[i].d_free)) == 0){
+	// got it...
+	gc_client_thread_info *cti = &d_client_thread[i];
+	cti->d_state = CT_NOT_WAITING;
+	bv_zero(cti->d_jobs_done);
+	cti->d_njobs_waiting_for = 0;
+	cti->d_jobs_waiting_for = 0;
+	
+	return cti;
+      }
+    }
+  }
+  return 0;
+}
+
+void
+gc_job_manager_impl::free_cti(gc_client_thread_info *cti)
+{
+  assert((size_t) (cti - d_client_thread.get()) < d_options.max_client_threads);
+  cti->d_free = 1;
+}
+
+int
+gc_job_manager_impl::ea_args_maxsize()
+{
+  omni_mutex_lock	l(d_eh_mutex);
+
+  while (d_ea_args_maxsize == 0)	// wait for it to be initialized
+    d_eh_cond.wait();
+
+  return d_ea_args_maxsize;
+}
+
+void
+gc_job_manager_impl::set_debug(int debug)
+{
+  d_debug = debug;
+  s_worker_debug = debug;
+}
+
+int
+gc_job_manager_impl::debug()
+{
+  return d_debug;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+void
+gc_job_manager_impl::setup_logfiles()
+{
+  if (!d_options.enable_logging)
+    return;
+
+  if (d_options.log2_nlog_entries == 0)
+    d_options.log2_nlog_entries = 12;
+
+  // must end up a multiple of the page size
+
+  size_t pagesize = getpagesize();
+  size_t s = (1 << d_options.log2_nlog_entries) * sizeof(gc_log_entry_t);
+  s = ((s + pagesize - 1) / pagesize) * pagesize;
+  size_t nentries = s / sizeof(gc_log_entry_t);
+  assert(is_power_of_2(nentries));
+
+  for (unsigned int i = 0; i < d_options.nspes; i++){
+    char filename[100];
+    snprintf(filename, sizeof(filename), "spu_log.%02d", i);
+    int fd = open(filename, O_CREAT|O_TRUNC|O_RDWR, 0664);
+    if (fd == -1){
+      perror(filename);
+      return;
+    }
+    lseek(fd, s - 1, SEEK_SET);
+    write(fd, "\0", 1);
+    void *p = mmap(0, s, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    if (p == MAP_FAILED){
+      perror("gc_job_manager_impl::setup_logfiles: mmap");
+      close(fd);
+      return;
+    }
+    close(fd);
+    memset(p, 0, s);
+    d_spu_args[i].log.base = ptr_to_ea(p);
+    d_spu_args[i].log.nentries = nentries;
+  }
+}
+
+void
+gc_job_manager_impl::sync_logfiles()
+{
+  for (unsigned int i = 0; i < d_options.nspes; i++){
+    if (d_spu_args[i].log.base)
+      msync(ea_to_ptr(d_spu_args[i].log.base),
+	    d_spu_args[i].log.nentries * sizeof(gc_log_entry_t),
+	    MS_ASYNC);
+  }
+}
+
+void
+gc_job_manager_impl::unmap_logfiles()
+{
+  for (unsigned int i = 0; i < d_options.nspes; i++){
+    if (d_spu_args[i].log.base)
+      munmap(ea_to_ptr(d_spu_args[i].log.base),
+	     d_spu_args[i].log.nentries * sizeof(gc_log_entry_t));
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+// lookup proc names in d_proc_def table
+
+gc_proc_id_t 
+gc_job_manager_impl::lookup_proc(const std::string &proc_name)
+{
+  for (int i = 0; i < d_nproc_defs; i++)
+    if (proc_name == d_proc_def[i].name)
+      return i;
+
+  throw gc_unknown_proc(proc_name);
+}
+
+std::vector<std::string>
+gc_job_manager_impl::proc_names()
+{
+  std::vector<std::string> r;
+  for (int i = 0; i < d_nproc_defs; i++)
+    r.push_back(d_proc_def[i].name);
+
+  return r;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+worker_ctx::~worker_ctx()
+{
+  if (spe_ctx){
+    int r = spe_context_destroy(spe_ctx);
+    if (r != 0){
+      perror("spe_context_destroy");
+    }
+    spe_ctx = 0;
+  }
+  state = WS_FREE;
+}
diff --git a/gcell/lib/runtime/gc_job_manager_impl.h b/gcell/lib/runtime/gc_job_manager_impl.h
new file mode 100644
index 000000000..dad7873ab
--- /dev/null
+++ b/gcell/lib/runtime/gc_job_manager_impl.h
@@ -0,0 +1,252 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GC_JOB_MANAGER_IMPL_H
+#define INCLUDED_GC_JOB_MANAGER_IMPL_H
+
+#include <gcell/gc_job_manager.h>
+#include <gcell/gc_jd_stack.h>
+#include <gcell/gc_jd_queue.h>
+#include <gcell/gc_spu_args.h>
+#include "gc_client_thread_info.h"
+#include <libspe2.h>
+#include <vector>
+#include <boost/scoped_array.hpp>
+
+typedef boost::shared_ptr<spe_gang_context> spe_gang_context_sptr;
+typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr;
+typedef boost::scoped_array<gc_client_thread_info> gc_client_thread_info_sa;
+
+
+enum worker_state {
+  WS_FREE,	// not in use
+  WS_INIT,	// allocated and being initialized
+  WS_RUNNING,	// the thread is running
+  WS_DEAD,	// the thread is dead
+};
+
+struct worker_ctx {
+  volatile worker_state	state;
+  unsigned int		spe_idx;  	// [0, nspes-1]
+  spe_context_ptr_t	spe_ctx;
+  pthread_t		thread;
+  gc_spu_args_t		*spu_args;	// pointer to 16-byte aligned struct
+
+  worker_ctx()
+    : state(WS_FREE), spe_idx(0), spe_ctx(0),
+      thread(0), spu_args(0) {}
+  ~worker_ctx();
+};
+
+enum evt_handler_state {
+  EHS_INIT,		// being initialized
+  EHS_RUNNING,		// thread is running
+  EHS_SHUTTING_DOWN,	// in process of shutting down everything
+  EHS_WAITING_FOR_WORKERS_TO_DIE,
+  EHS_DEAD,		// thread is dead
+};
+
+struct spe_event_handler {
+  spe_event_handler_ptr_t	ptr;
+
+  spe_event_handler() : ptr(0) {}
+  ~spe_event_handler(){
+    if (ptr){
+      if (spe_event_handler_destroy(ptr) != 0){
+	perror("spe_event_handler_destroy");
+      }
+    }
+  }
+};
+
+
+/*!
+ * \brief Concrete class that manages SPE jobs.
+ *
+ * This class contains all the implementation details.
+ */
+class gc_job_manager_impl : public gc_job_manager
+{
+  enum { MAX_SPES =  16 };
+
+  int			  d_debug;
+  gc_jm_options		  d_options;
+  spe_program_handle_sptr d_spe_image;
+  spe_gang_context_sptr   d_gang;		// boost::shared_ptr
+
+  worker_ctx 		 d_worker[MAX_SPES];	// SPE ctx, thread, etc
+  gc_spu_args_t		*d_spu_args;		// 16-byte aligned structs
+  boost::shared_ptr<void> _d_spu_args_boost;	// hack for automatic storage mgmt
+
+  gc_comp_info_t	*d_comp_info;		// 128-byte aligned structs
+  boost::shared_ptr<void> _d_comp_info_boost;	// hack for automatic storage mgmt
+
+  // used to coordinate communication w/ the event handling thread
+  omni_mutex		 d_eh_mutex;
+  omni_condition	 d_eh_cond;
+  pthread_t		 d_eh_thread;		// the event handler thread
+  volatile evt_handler_state	d_eh_state;
+  volatile bool		 	d_shutdown_requested;
+  spe_event_handler	 d_spe_event_handler;
+  
+
+  // All of the job descriptors are hung off of here.
+  // We allocate them all in a single cache aligned chunk.
+  gc_job_desc_t		*d_jd;			// [options.max_jobs]
+  boost::shared_ptr<void> _d_jd_boost;		// hack for automatic storage mgmt
+
+  gc_client_thread_info_sa d_client_thread;	// [options.max_client_threads]
+
+  // We use bitvectors to represent the completing state of a job.  Each
+  // bitvector is d_bvlen longs in length.
+  int			 d_bvlen;		// bit vector length in longs
+
+  // This contains the storage for all the bitvectors used by the job
+  // manager.  There's 1 for each client thread, in the d_jobs_done
+  // field.  We allocate them all in a single cache aligned chunk.
+  boost::shared_ptr<void> _d_all_bitvectors;	// hack for automatic storage mgmt
+
+  // Lock free stack where we keep track of the free job descriptors.
+  gc_jd_stack_t		*d_free_list;		// stack of free job descriptors
+  boost::shared_ptr<void> _d_free_list_boost;	// hack for automatic storage mgmt
+
+  // The PPE inserts jobs here; SPEs pull jobs from here.
+  gc_jd_queue_t	 	*d_queue;		// job queue
+  boost::shared_ptr<void> _d_queue_boost;	// hack for automatic storage mgmt
+
+  int			 d_ea_args_maxsize;
+
+  struct gc_proc_def	*d_proc_def;		// the SPE procedure table
+  uint32_t		 d_proc_def_ls_addr;	// the LS address of the table
+  int			 d_nproc_defs;		// number of proc_defs in table
+
+  gc_client_thread_info *alloc_cti();
+  void free_cti(gc_client_thread_info *cti);
+
+  void create_event_handler();
+  void set_eh_state(evt_handler_state s);
+  void set_ea_args_maxsize(int maxsize);
+
+  void notify_clients_jobs_are_done(unsigned int spe_num,
+				    unsigned int completion_info_idx);
+
+public:
+  void event_handler_loop();	// really private
+
+private:
+  bool send_all_spes(uint32_t msg);
+  bool send_spe(unsigned int spe, uint32_t msg);
+  void print_event(spe_event_unit_t *evt);
+  void handle_event(spe_event_unit_t *evt);
+
+  // bitvector ops
+  void bv_zero(unsigned long *bv);
+  void bv_clr(unsigned long *bv, unsigned int bitno);
+  void bv_set(unsigned long *bv, unsigned int bitno);
+  bool bv_isset(unsigned long *bv, unsigned int bitno);
+  bool bv_isclr(unsigned long *bv, unsigned int bitno);
+
+  void setup_logfiles();
+  void sync_logfiles();
+  void unmap_logfiles();
+
+  friend gc_job_manager_sptr gc_make_job_manager(const gc_jm_options *options);
+  
+  gc_job_manager_impl(const gc_jm_options *options = 0);
+
+public:
+  virtual ~gc_job_manager_impl();
+
+  /*!
+   * Stop accepting new jobs.  Wait for existing jobs to complete.
+   * Return all managed SPE's to the system.
+   */
+  virtual bool shutdown();
+
+  /*!
+   * \brief Return number of SPE's currently allocated to job manager.
+   */
+  virtual int nspes() const;
+
+  /*!
+   * \brief Return a pointer to a properly aligned job descriptor,
+   * or zero if none are available.
+   */
+  virtual gc_job_desc *alloc_job_desc();
+
+  /*
+   *! Return a job descriptor previously allocated with alloc_job_desc()
+   *
+   * \param[in] jd pointer to job descriptor to free.
+   */
+  virtual void free_job_desc(gc_job_desc *jd);
+
+  /*!
+   * \brief Submit a job for asynchronous processing on an SPE.
+   *
+   * \param[in] jd pointer to job description
+   *
+   * The caller must not read or write the job description
+   * or any of the memory associated with any indirect arguments
+   * until after calling wait_job.
+   *
+   * \returns true iff the job was successfully enqueued.
+   * If submit_job returns false, check jd->status for additional info.
+   */
+  virtual bool submit_job(gc_job_desc *jd);
+
+  /*!
+   * \brief Wait for job to complete.
+   *
+   * A thread may only wait for jobs which it submitted.
+   *
+   * \returns true if sucessful, else false.
+   */
+  virtual bool 
+  wait_job(gc_job_desc *jd);
+
+  /*!
+   * \brief wait for 1 or more jobs to complete.
+   *
+   * \param[input] njobs is the length of arrays \p jd and \p done.
+   * \param[input] jd are the jobs that are to be waited for.
+   * \param[output] done indicates whether the corresponding job is complete.
+   * \param[input] mode indicates whether to wait for ALL or ANY of the jobs
+   *   in \p jd to complete.
+   *
+   * A thread may only wait for jobs which it submitted.
+   *
+   * \returns number of jobs completed, or -1 if error.
+   */
+  virtual int
+  wait_jobs(unsigned int njobs,
+	    gc_job_desc *jd[], bool done[], gc_wait_mode mode);
+
+  virtual int ea_args_maxsize();
+
+  virtual gc_proc_id_t lookup_proc(const std::string &name);
+  virtual std::vector<std::string> proc_names();
+
+  virtual void set_debug(int debug);
+  virtual int debug();
+};
+
+#endif /* INCLUDED_GC_JOB_MANAGER_IMPL_H */
diff --git a/gcell/lib/runtime/gc_proc_def_utils.cc b/gcell/lib/runtime/gc_proc_def_utils.cc
new file mode 100644
index 000000000..0250d677a
--- /dev/null
+++ b/gcell/lib/runtime/gc_proc_def_utils.cc
@@ -0,0 +1,123 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <gc_proc_def_utils.h>
+#include <gcell/gc_declare_proc.h>
+#include <elf.h>
+#include <stdio.h>
+#include <string.h>
+
+static const unsigned char expected[EI_PAD] = {
+  ELFMAG0,
+  ELFMAG1,
+  ELFMAG2,
+  ELFMAG3,
+  ELFCLASS32,
+  ELFDATA2MSB,
+  EV_CURRENT,
+  ELFOSABI_SYSV,
+  0
+};
+
+
+/*
+ * Basically we're going to find the GC_PROC_DEF_SECTION section
+ * in the ELF file and return a pointer to it.  The only things in that
+ * section are gc_proc_def's
+ */
+bool 
+gcpd_find_table(spe_program_handle_t *handle,
+		struct gc_proc_def **table, int *nentries, uint32_t *ls_addr)
+{
+  if (!handle || !table || !nentries)
+    return false;
+
+  *table = 0;
+  *nentries = 0;
+  
+  Elf32_Ehdr *ehdr = (Elf32_Ehdr *)handle->elf_image;
+  if (!ehdr){
+    fprintf(stderr, "gcpd: No ELF image has been loaded\n");
+    return false;
+  }
+
+  // quick check that we're looking at a SPE EXEC object
+
+  if (memcmp(ehdr->e_ident, expected, EI_PAD) != 0){
+    fprintf(stderr, "gcpd: invalid ELF header\n");
+    return false;
+  }
+
+  if (ehdr->e_machine != 0x17){		// confirm machine type (EM_SPU)
+    fprintf(stderr, "gcpd: not an SPE ELF object\n");
+    return false;
+  }
+
+  if (ehdr->e_type != ET_EXEC){
+    fprintf(stderr, "gcpd: invalid SPE ELF type.\n");
+    fprintf(stderr, "gcpd: SPE type %d != %d\n", ehdr->e_type, ET_EXEC);
+    return false;
+  }
+
+  // find the section header table
+
+  Elf32_Shdr *shdr;
+  Elf32_Shdr *sh;
+
+  if (ehdr->e_shentsize != sizeof (*shdr)){
+    fprintf(stderr, "gcpd: invalid section header format.\n");
+    return false;
+  }
+
+  if (ehdr->e_shnum == 0){
+    fprintf(stderr, "gcpd: no section headers in file.\n");
+    return false;
+  }
+
+  shdr = (Elf32_Shdr *) ((char *)ehdr + ehdr->e_shoff);
+  char *str_table = (char *)ehdr + shdr[ehdr->e_shstrndx].sh_offset;
+
+  // traverse the sections looking for GC_PROC_DEF_SECTION
+  
+  for (sh = shdr; sh < &shdr[ehdr->e_shnum]; sh++){
+    if (0){
+      fprintf(stderr, "section name: %s (start: 0x%04x, size: 0x%04x)\n",
+	      str_table + sh->sh_name, sh->sh_offset, sh->sh_size);
+    }
+
+    if (strcmp(GC_PROC_DEF_SECTION, str_table+sh->sh_name) == 0){
+      *table = (struct gc_proc_def *)((char *)ehdr + sh->sh_offset);
+      if (sh->sh_size % (sizeof(struct gc_proc_def)) != 0){
+	fprintf(stderr, "gcpd: %s section has invalid format\n", GC_PROC_DEF_SECTION);
+	return false;
+      }
+      *nentries = sh->sh_size / sizeof(struct gc_proc_def);
+      *ls_addr = sh->sh_addr;
+      return true;
+    }
+  }
+
+  return false;
+}
diff --git a/gcell/lib/runtime/gc_proc_def_utils.h b/gcell/lib/runtime/gc_proc_def_utils.h
new file mode 100644
index 000000000..79c9b51aa
--- /dev/null
+++ b/gcell/lib/runtime/gc_proc_def_utils.h
@@ -0,0 +1,42 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GC_PROC_DEF_UTILS_H
+#define INCLUDED_GC_PROC_DEF_UTILS_H
+
+#include <gcell/gc_declare_proc.h>
+#include <libspe2.h>
+
+/*!
+ * \brief find the gc_proc_def table in the SPE program
+ *
+ * \param[in]  program is the handle to the loaded SPE program
+ * \param[out] table points to the table, if it's found
+ * \param[out] nentries is set to the number of entries in the table.
+ * \param[out] ls_addr is set to the Local Store address of the table
+ *
+ * \returns true if successful, else false
+ */
+bool
+gcpd_find_table(spe_program_handle_t *program,
+		struct gc_proc_def **table, int *nentries, uint32_t *ls_addr);
+
+
+#endif /* INCLUDED_GC_PROC_DEF_UTILS_H */
diff --git a/gcell/lib/runtime/gcell-embedspu-libtool b/gcell/lib/runtime/gcell-embedspu-libtool
new file mode 100755
index 000000000..c206086a4
--- /dev/null
+++ b/gcell/lib/runtime/gcell-embedspu-libtool
@@ -0,0 +1,40 @@
+#!/bin/bash
+#
+# Take a spu executable and turn into into a libtool compatible .lo (and .o) file.
+# This is needed when you want to embed a SPU executable into a shared library.
+#
+# The symbol assigned to the embedded executable is the basename of the
+# output file with an _spx appended.  E.g., if the output filename is
+# my_spe_tricks.lo the symbol name is my_spe_tricks_spx.
+# ("_spx" stands for SPE executable)
+
+if [ $# -ne 2 ]; then
+  echo "usage: gcell-embedspu-libtool spu_executable output_file.lo " 1>&2
+  exit 1
+fi
+
+spu_executable=$1
+lo_file=$2
+symbol_name=${lo_file%%.lo}_spx
+
+# try to make .libs in case it's not there
+mkdir .libs >/dev/null 2>/dev/null
+
+# generate the .o file that wraps the SPU executable
+ppu-embedspu -m32 -fpic ${symbol_name} ${spu_executable} .libs/${symbol_name}.o
+
+# generate the .lo libtool file that points at all the right places
+rm -f $lo_file
+cat >$lo_file.new <<EOF
+# $lo_file - a libtool object file
+# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+pic_object='.libs/${symbol_name}.o'
+non_pic_object=none
+EOF
+
+mv $lo_file.new $lo_file
+
diff --git a/gcell/lib/runtime/qa_gcell_runtime.cc b/gcell/lib/runtime/qa_gcell_runtime.cc
new file mode 100644
index 000000000..fef9a7fb4
--- /dev/null
+++ b/gcell/lib/runtime/qa_gcell_runtime.cc
@@ -0,0 +1,43 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * This class gathers together all the test cases for the lib
+ * directory into a single test suite.  As you create new test cases,
+ * add them here.
+ */
+
+#include <qa_gcell_runtime.h>
+#include <qa_jd_stack.h>
+#include <qa_jd_queue.h>
+#include <qa_job_manager.h>
+
+CppUnit::TestSuite *
+qa_gcell_runtime::suite()
+{
+  CppUnit::TestSuite	*s = new CppUnit::TestSuite("runtime");
+
+  s->addTest(qa_jd_stack::suite());
+  s->addTest(qa_jd_queue::suite());
+  s->addTest(qa_job_manager::suite());
+
+  return s;
+}
diff --git a/gcell/lib/runtime/qa_gcell_runtime.h b/gcell/lib/runtime/qa_gcell_runtime.h
new file mode 100644
index 000000000..36180c919
--- /dev/null
+++ b/gcell/lib/runtime/qa_gcell_runtime.h
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_GCELL_RUNTIME_H
+#define INCLUDED_QA_GCELL_RUNTIME_H
+
+#include <cppunit/TestSuite.h>
+
+//! collect all the tests for the runtime directory
+
+class qa_gcell_runtime {
+public:
+  //! return suite of tests
+  static CppUnit::TestSuite *suite();
+};
+
+
+#endif /* INCLUDED_QA_GCELL_RUNTIME_H */
diff --git a/gcell/lib/runtime/qa_jd_queue.cc b/gcell/lib/runtime/qa_jd_queue.cc
new file mode 100644
index 000000000..35ab9d82f
--- /dev/null
+++ b/gcell/lib/runtime/qa_jd_queue.cc
@@ -0,0 +1,78 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_jd_queue.h"
+#include <cppunit/TestAssert.h>
+#include <gcell/gc_jd_queue.h>
+#include <stdio.h>
+
+
+
+static const int NJDS = 16;
+static gc_jd_queue_t queue;
+static gc_job_desc_t jds[NJDS];
+
+// no brainer, single threaded basic checkout
+void
+qa_jd_queue::t1()
+{
+  // N.B., queue allocated stuff doesn't obey ((aligned (N))) attributes
+  //const int NJDS = 8;
+  //gc_jd_queue_t queue;
+  //gc_job_desc_t jds[NJDS];
+
+  //printf("&queue   = %p\n", &queue);
+  //printf("&jds[0] = %p\n", &jds[0]);
+  //printf("&jds[1] = %p\n", &jds[1]);
+
+  CPPUNIT_ASSERT(((uintptr_t) &queue & 0x7f) == 0);
+  CPPUNIT_ASSERT(((uintptr_t) &jds[0] & 0x7f) == 0);
+  CPPUNIT_ASSERT(((uintptr_t) &jds[1] & 0x7f) == 0);
+
+  gc_jd_queue_init(&queue);
+
+  CPPUNIT_ASSERT(gc_jd_queue_dequeue(&queue) == 0);
+
+  gc_jd_queue_enqueue(&queue, &jds[0]);
+  CPPUNIT_ASSERT_EQUAL(&jds[0], gc_jd_queue_dequeue(&queue));
+
+  CPPUNIT_ASSERT(gc_jd_queue_dequeue(&queue) == 0);
+
+  for (int i = 0; i < NJDS; i++)
+    gc_jd_queue_enqueue(&queue, &jds[i]);
+
+  for (int i = 0; i < NJDS; i++)
+    CPPUNIT_ASSERT_EQUAL(&jds[i], gc_jd_queue_dequeue(&queue));
+
+  CPPUNIT_ASSERT(gc_jd_queue_dequeue(&queue) == 0);
+}
+
+// FIXME multithreaded (running on PPE)
+void
+qa_jd_queue::t2()
+{
+}
+
+// FIXME multithreaded (running on PPE & SPE)
+void
+qa_jd_queue::t3()
+{
+}
diff --git a/gcell/lib/runtime/qa_jd_queue.h b/gcell/lib/runtime/qa_jd_queue.h
new file mode 100644
index 000000000..5e1aab84f
--- /dev/null
+++ b/gcell/lib/runtime/qa_jd_queue.h
@@ -0,0 +1,42 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_JD_QUEUE_H
+#define INCLUDED_QA_JD_QUEUE_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_jd_queue : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE(qa_jd_queue);
+  CPPUNIT_TEST(t1);
+  CPPUNIT_TEST(t2);
+  CPPUNIT_TEST(t3);
+  CPPUNIT_TEST_SUITE_END();
+
+ private:
+  void t1();
+  void t2();
+  void t3();
+};
+
+
+#endif /* INCLUDED_QA_JD_QUEUE_H */
diff --git a/gcell/lib/runtime/qa_jd_stack.cc b/gcell/lib/runtime/qa_jd_stack.cc
new file mode 100644
index 000000000..ce4ce2d0d
--- /dev/null
+++ b/gcell/lib/runtime/qa_jd_stack.cc
@@ -0,0 +1,67 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_jd_stack.h"
+#include <cppunit/TestAssert.h>
+#include <gcell/gc_jd_stack.h>
+#include <stdio.h>
+
+
+
+static const int NJDS = 8;
+static gc_jd_stack_t stack;
+static gc_job_desc_t jds[NJDS];
+
+// no brainer, single threaded basic checkout
+void
+qa_jd_stack::t1()
+{
+  // N.B., stack allocated stuff doesn't obey ((aligned (N))) attributes
+  //const int NJDS = 8;
+  //gc_jd_stack_t stack;
+  //gc_job_desc_t jds[NJDS];
+
+  //printf("&stack   = %p\n", &stack);
+  //printf("&jds[0] = %p\n", &jds[0]);
+  //printf("&jds[1] = %p\n", &jds[1]);
+
+  CPPUNIT_ASSERT(((uintptr_t) &stack & 0x7f) == 0);
+  CPPUNIT_ASSERT(((uintptr_t) &jds[0] & 0x7f) == 0);
+  CPPUNIT_ASSERT(((uintptr_t) &jds[1] & 0x7f) == 0);
+
+  gc_jd_stack_init(&stack);
+
+  CPPUNIT_ASSERT(gc_jd_stack_pop(&stack) == 0);
+
+  for (int i = 0; i < NJDS; i++)
+    gc_jd_stack_push(&stack, &jds[i]);
+
+  for (int i = 0; i < NJDS; i++)
+    CPPUNIT_ASSERT_EQUAL(&jds[NJDS - i - 1], gc_jd_stack_pop(&stack));
+
+  CPPUNIT_ASSERT(gc_jd_stack_pop(&stack) == 0);
+}
+
+// FIXME multithreaded (running on PPE)
+void
+qa_jd_stack::t2()
+{
+}
diff --git a/gcell/lib/runtime/qa_jd_stack.h b/gcell/lib/runtime/qa_jd_stack.h
new file mode 100644
index 000000000..1546bbf9f
--- /dev/null
+++ b/gcell/lib/runtime/qa_jd_stack.h
@@ -0,0 +1,42 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_JD_STACK_H
+#define INCLUDED_QA_JD_STACK_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_jd_stack : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE(qa_jd_stack);
+  CPPUNIT_TEST(t1);
+  CPPUNIT_TEST(t2);
+  CPPUNIT_TEST_SUITE_END();
+
+ private:
+  void t1();
+  void t2();
+
+};
+
+
+
+#endif /* INCLUDED_QA_JD_STACK_H */
diff --git a/gcell/lib/runtime/qa_job_manager.cc b/gcell/lib/runtime/qa_job_manager.cc
new file mode 100644
index 000000000..45cf26cfc
--- /dev/null
+++ b/gcell/lib/runtime/qa_job_manager.cc
@@ -0,0 +1,790 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_job_manager.h"
+#include <cppunit/TestAssert.h>
+#include <gcell/gc_job_manager.h>
+#include <stdexcept>
+#include <stdio.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+
+#include <malloc.h>
+
+// handle to embedded SPU executable w/ QA routines
+extern spe_program_handle_t gcell_runtime_qa_spx;
+
+#if 0
+static void
+gc_msleep(unsigned long millisecs)
+{
+  int r;
+  struct timespec tv;
+  tv.tv_sec = millisecs / 1000;
+  tv.tv_nsec = (millisecs - (tv.tv_sec * 1000)) * 1000000;
+  
+  while (1){
+    r = nanosleep(&tv, &tv);
+    if (r == 0)
+      return;
+    if (r == -1 && errno == EINTR)
+      continue;
+    perror("nanosleep");
+    return;
+  }
+}
+#endif
+
+void
+qa_job_manager::leak_check(test_t t, const std::string &name)
+{
+  struct mallinfo before, after;
+
+  before = mallinfo();
+  (this->*t)();
+  after = mallinfo();
+
+  size_t delta = after.uordblks - before.uordblks;
+  if (delta != 0){
+    std::cout << name << " leaked memory\n";
+    printf("  before.uordblks = %6d\n", before.uordblks);
+    printf("  after.uordblks  = %6d\n",  after.uordblks);
+    printf("  delta = %d\n", after.uordblks - before.uordblks);
+  }
+}
+
+void
+qa_job_manager::t0()
+{
+  //leak_check(&qa_job_manager::t1_body, "t1-0");
+}
+
+void
+qa_job_manager::t1()
+{
+  t1_body();		// leaks 800 bytes first time, could be one-time inits
+  leak_check(&qa_job_manager::t1_body, "t1");
+}
+
+void
+qa_job_manager::t2()
+{
+  leak_check(&qa_job_manager::t2_body, "t2");
+}
+
+void
+qa_job_manager::t3()
+{
+  t3_body();		// leaks first time only, could be cppunit
+  leak_check(&qa_job_manager::t3_body, "t3");
+}
+
+void
+qa_job_manager::t4()
+{
+  leak_check(&qa_job_manager::t4_body, "t4");
+}
+
+void
+qa_job_manager::t5()
+{
+  leak_check(&qa_job_manager::t5_body, "t5");
+}
+
+void
+qa_job_manager::t6()
+{
+  leak_check(&qa_job_manager::t6_body, "t6");
+}
+
+void
+qa_job_manager::t7()
+{
+  leak_check(&qa_job_manager::t7_body, "t7");
+}
+
+void
+qa_job_manager::t8()
+{
+  leak_check(&qa_job_manager::t8_body, "t8");
+}
+
+void
+qa_job_manager::t9()
+{
+  leak_check(&qa_job_manager::t9_body, "t9");
+}
+
+void
+qa_job_manager::t10()
+{
+  leak_check(&qa_job_manager::t10_body, "t10");
+}
+
+void
+qa_job_manager::t11()
+{
+  leak_check(&qa_job_manager::t11_body, "t11");
+}
+
+void
+qa_job_manager::t12()
+{
+  leak_check(&qa_job_manager::t12_body, "t12");
+}
+
+void
+qa_job_manager::t13()
+{
+  leak_check(&qa_job_manager::t13_body, "t13");
+}
+
+void
+qa_job_manager::t14()
+{
+  leak_check(&qa_job_manager::t14_body, "t14");
+}
+
+void
+qa_job_manager::t15()
+{
+  leak_check(&qa_job_manager::t15_body, "t15");
+}
+
+// ----------------------------------------------------------------
+
+void
+qa_job_manager::t1_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  mgr = gc_make_job_manager(&opts);
+}
+
+void
+qa_job_manager::t2_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 100;
+  opts.gang_schedule = false;
+  mgr = gc_make_job_manager(&opts);
+}
+
+void
+qa_job_manager::t3_body()
+{
+  // This leaks memory the first time it's invoked, but I'm not sure
+  // if it's us or the underlying exception handling mechanism, or
+  // cppunit.  cppunit is the prime suspect.
+
+#if 0
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 100;
+  opts.gang_schedule = true;
+  CPPUNIT_ASSERT_THROW(mgr = gc_make_job_manager(&opts), std::out_of_range);
+#endif
+}
+
+static void
+init_jd(gc_job_desc *jd, gc_proc_id_t proc_id)
+{
+  jd->proc_id = proc_id;
+  jd->input.nargs = 0;
+  jd->output.nargs = 0;
+  jd->eaa.nargs = 0;
+}
+
+void
+qa_job_manager::t4_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+  //mgr->set_debug(-1);
+  static const int NJOBS = 32;
+  gc_job_desc *jds[NJOBS];
+  bool done[NJOBS];
+
+  gc_proc_id_t gcp_no_such;
+  CPPUNIT_ASSERT_THROW(gcp_no_such = mgr->lookup_proc("--no-such-proc-name--"), gc_unknown_proc);
+
+  gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop");
+  CPPUNIT_ASSERT(gcp_qa_nop != GCP_UNKNOWN_PROC);
+
+  for (int i = 0; i < NJOBS; i++){
+    jds[i] = mgr->alloc_job_desc();
+    init_jd(jds[i], gcp_qa_nop);
+  }
+
+  for (int i = 0; i < NJOBS; i++){
+    if (!mgr->submit_job(jds[i])){
+      printf("%d: submit_job(jds[%d]) failed, status = %d\n",
+	     __LINE__, i, jds[i]->status);
+    }
+  }
+
+  int n = mgr->wait_jobs(NJOBS, jds, done, GC_WAIT_ALL);
+  CPPUNIT_ASSERT_EQUAL(NJOBS, n);
+
+  for (int i = 0; i < NJOBS; i++){
+    mgr->free_job_desc(jds[i]);
+  }
+}
+
+void
+qa_job_manager::t5_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 0;	// use them all
+  mgr = gc_make_job_manager(&opts);
+  //mgr->set_debug(-1);
+  static const int NJOBS = 32;
+  gc_job_desc *jds[NJOBS];
+  bool done[NJOBS];
+
+  gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop");
+
+  for (int i = 0; i < NJOBS; i++){
+    jds[i] = mgr->alloc_job_desc();
+    init_jd(jds[i], gcp_qa_nop);
+  }
+
+  for (int i = 0; i < NJOBS; i++){
+    if (!mgr->submit_job(jds[i])){
+      printf("%d: submit_job(jds[%d]) failed, status = %d\n",
+	     __LINE__, i, jds[i]->status);
+    }
+  }
+
+  int n = mgr->wait_jobs(NJOBS, jds, done, GC_WAIT_ALL);
+  CPPUNIT_ASSERT_EQUAL(NJOBS, n);
+
+  for (int i = 0; i < NJOBS; i++){
+    mgr->free_job_desc(jds[i]);
+  }
+}
+
+void
+qa_job_manager::t6_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;	
+  mgr = gc_make_job_manager(&opts);
+  gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop");
+  gc_job_desc *jd = mgr->alloc_job_desc();
+
+  
+  // test for success with gcp_qa_nop procedure
+  init_jd(jd, gcp_qa_nop);
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+  }
+
+  // test for JS_UNKNOWN_PROC with bogus procedure
+  init_jd(jd, -2);
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_UNKNOWN_PROC, jd->status);
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+static int
+sum_shorts(short *p, int nshorts)
+{
+  int total = 0;
+  for (int i = 0; i < nshorts; i++)
+    total += p[i];
+
+  return total;
+}
+
+static void
+test_sum_shorts(gc_job_manager_sptr mgr, short *buf, int nshorts)
+{
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_sum_shorts = mgr->lookup_proc("qa_sum_shorts");
+
+  init_jd(jd, gcp_qa_sum_shorts);
+  jd->eaa.nargs = 1;
+  jd->eaa.arg[0].ea_addr = ptr_to_ea(buf);
+  jd->eaa.arg[0].direction = GCJD_DMA_GET;
+  jd->eaa.arg[0].get_size = nshorts * sizeof(short);
+  
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+    int expected = sum_shorts(buf, nshorts);
+    int actual = jd->output.arg[0].s32;
+    CPPUNIT_ASSERT_EQUAL(expected, actual);
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+static const int NS = 32768;
+static short short_buf[NS] _AL128;	// for known alignment
+
+//
+// test all "get" alignments and sizes
+//
+void
+qa_job_manager::t7_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+
+  int ea_args_maxsize = mgr->ea_args_maxsize();
+
+  for (int i = 0; i < NS; i++)	// init buffer with known qty
+    short_buf[i] = 0x1234 + i;
+  
+  for (int offset = 0; offset <= 128; offset++){
+    for (int len = 0; len <= 128; len++){
+      test_sum_shorts(mgr, &short_buf[offset], len);
+    }
+  }
+
+  // confirm maximum length
+  for (int offset = 0; offset <= 64; offset++){
+    test_sum_shorts(mgr, &short_buf[offset], ea_args_maxsize/sizeof(short));
+  }
+}
+
+//
+// test "get" args too long
+//
+void
+qa_job_manager::t8_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_sum_shorts = mgr->lookup_proc("qa_sum_shorts");
+
+  init_jd(jd, gcp_qa_sum_shorts);
+  jd->eaa.nargs = 1;
+  jd->eaa.arg[0].ea_addr = 0;
+  jd->eaa.arg[0].direction = GCJD_DMA_GET;
+  jd->eaa.arg[0].get_size = 1 << 20;
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_ARGS_TOO_LONG, jd->status);
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+//
+// test MAX_ARGS_EA "get" case
+//
+void
+qa_job_manager::t9_body()
+{
+  static const int N = 127;
+  static const int M = 201;
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_sum_shorts = mgr->lookup_proc("qa_sum_shorts");
+
+  init_jd(jd, gcp_qa_sum_shorts);
+  jd->eaa.nargs = MAX_ARGS_EA;
+  for (int i = 0; i < MAX_ARGS_EA; i++){
+    jd->eaa.arg[i].direction = GCJD_DMA_GET;
+    jd->eaa.arg[i].ea_addr = ptr_to_ea(&short_buf[i * M]);
+    jd->eaa.arg[i].get_size = N * sizeof(short);
+  }
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+    for (int i = 0; i < MAX_ARGS_EA; i++){
+      int expected = sum_shorts(&short_buf[i * M], N);
+      int actual = jd->output.arg[i].s32;
+      CPPUNIT_ASSERT_EQUAL(expected, actual);
+    }
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+static bool
+confirm_const(const unsigned char *buf, size_t len, unsigned char v)
+{
+  bool ok = true;
+
+  for (size_t i = 0; i < len; i++){
+    if (buf[i] != v){
+      ok = false;
+      printf("confirm_const: buf[%6d] = 0x%02x, expected = 0x%02x\n",
+	     i, buf[i], v);
+    }
+  }
+
+  return ok;
+}
+
+static bool
+confirm_seq(const unsigned char *buf, size_t len, unsigned char v)
+{
+  bool ok = true;
+
+  for (size_t i = 0; i < len; i++, v++){
+    if (buf[i] != v){
+      ok = false;
+      printf("confirm_seq: buf[%6d] = 0x%02x, expected = 0x%02x\n",
+	     i, buf[i], v);
+    }
+  }
+
+  return ok;
+}
+
+static void
+test_put_seq(gc_job_manager_sptr mgr, int offset, int len, int starting_val)
+{
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq");
+
+  unsigned char *buf = (unsigned char *) short_buf;
+  size_t buf_len = sizeof(short_buf);
+  memset(buf, 0xff, buf_len);
+
+  // two cache lines into the buffer, so we can check before and after
+  int fixed_offset = 256;
+
+  init_jd(jd, gcp_qa_put_seq);
+  jd->input.nargs = 1;
+  jd->input.arg[0].s32 = starting_val;
+  jd->eaa.nargs = 1;
+  jd->eaa.arg[0].ea_addr = ptr_to_ea(buf + fixed_offset + offset);
+  jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+  jd->eaa.arg[0].put_size = len;
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+    
+    // check before
+    CPPUNIT_ASSERT(confirm_const(&buf[0], fixed_offset + offset, 0xff)); 
+
+    // check sequence
+    CPPUNIT_ASSERT(confirm_seq(&buf[fixed_offset + offset], len, starting_val));
+
+    // check after
+    CPPUNIT_ASSERT(confirm_const(&buf[fixed_offset + offset + len],
+				 buf_len - fixed_offset - offset - len, 0xff));
+  }
+  mgr->free_job_desc(jd);
+}
+
+//
+// Test all "put" alignments and sizes
+//
+void
+qa_job_manager::t10_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+
+  int starting_val = 13;
+
+  for (int offset = 0; offset <= 128; offset++){
+    for (int len = 0; len <= 128; len++){
+      test_put_seq(mgr, offset, len, starting_val++);
+    }
+  }
+
+  int ea_args_maxsize = mgr->ea_args_maxsize();
+
+  // confirm maximum length
+  for (int offset = 0; offset <= 64; offset++){
+    test_put_seq(mgr, offset, ea_args_maxsize, starting_val++);
+  }
+}
+
+//
+// test "put" args too long
+//
+void
+qa_job_manager::t11_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq");
+
+  init_jd(jd, gcp_qa_put_seq);
+  jd->input.nargs = 1;
+  jd->input.arg[0].s32 = 0;
+  jd->eaa.nargs = 1;
+  jd->eaa.arg[0].ea_addr = 0;
+  jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+  jd->eaa.arg[0].put_size = 1 << 20;
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_ARGS_TOO_LONG, jd->status);
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+//
+// test MAX_ARGS_EA "put" case
+//
+void
+qa_job_manager::t12_body()
+{
+  static const int N = 127;
+  static const int M = 201;
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq");
+
+  unsigned char *buf = (unsigned char *) short_buf;
+  size_t buf_len = sizeof(short_buf);
+  memset(buf, 0xff, buf_len);
+
+  // two cache lines into the buffer, so we can check before and after
+  int fixed_offset = 256;
+
+  int starting_val = 13;
+
+  init_jd(jd, gcp_qa_put_seq);
+  jd->input.nargs = 1;
+  jd->input.arg[0].s32 = starting_val;
+  jd->eaa.nargs = MAX_ARGS_EA;
+  for (int i = 0; i < MAX_ARGS_EA; i++){
+    jd->eaa.arg[i].direction = GCJD_DMA_PUT;
+    jd->eaa.arg[i].ea_addr = ptr_to_ea(&buf[i * M + fixed_offset]);
+    jd->eaa.arg[i].put_size = N;
+  }
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+    for (int i = 0; i < MAX_ARGS_EA; i++){
+      CPPUNIT_ASSERT(confirm_seq(&buf[i * M + fixed_offset], N, starting_val));
+      starting_val += N;
+    }
+  }
+
+  mgr->free_job_desc(jd);
+}
+
+//
+// test qa_copy primitive
+//
+void
+qa_job_manager::t13_body()
+{
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+
+  memset(short_buf, 0, sizeof(short_buf));
+  for (int i = 0; i < NS/2; i++)	// init buffer with known qty
+    short_buf[i] = 0x1234 + i;
+
+  int nshorts = NS/2;
+
+  gc_job_desc *jd = mgr->alloc_job_desc();
+  gc_proc_id_t gcp_qa_copy = mgr->lookup_proc("qa_copy");
+
+#if 0
+  printf("gcq_qa_copy = %d\n", gcp_qa_copy);
+  std::vector<std::string> procs = mgr->proc_names();
+  for (unsigned int i = 0; i < procs.size(); ++i)
+    std::cout << procs[i] << std::endl;
+#endif
+
+  init_jd(jd, gcp_qa_copy);
+  jd->eaa.nargs = 2;
+  jd->eaa.arg[0].ea_addr = ptr_to_ea(&short_buf[nshorts]);
+  jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+  jd->eaa.arg[0].put_size = nshorts * sizeof(short);
+  
+  jd->eaa.arg[1].ea_addr = ptr_to_ea(&short_buf[0]);
+  jd->eaa.arg[1].direction = GCJD_DMA_GET;
+  jd->eaa.arg[1].get_size = nshorts * sizeof(short);
+  
+
+  if (!mgr->submit_job(jd)){
+    printf("%d: submit_job(jd) failed, status = %d\n", __LINE__, jd->status);
+  }
+  else {
+    mgr->wait_job(jd);
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd->status);
+    CPPUNIT_ASSERT_EQUAL(0, jd->output.arg[0].s32);
+
+    bool ok = true;
+    for (int i = 0; i < nshorts; i++){
+      if (short_buf[i] != short_buf[i + nshorts])
+	ok = false;
+    }
+    CPPUNIT_ASSERT(ok);
+  }
+  mgr->free_job_desc(jd);
+}
+
+/*
+ * Parallel submission of NJOBS "put" jobs will test double buffered puts.
+ */
+void
+qa_job_manager::t14_body()
+{
+  //return;
+
+  //static const int NJOBS = 64;
+  static const int NJOBS = 128;
+  static const int LEN_PER_JOB = 1021;
+  unsigned char    buf[NJOBS * LEN_PER_JOB];
+  gc_job_desc_t	  *jd[NJOBS];
+  bool		   done[NJOBS];
+
+  static const int STARTING_VAL = 13;
+
+  memset(buf, 0xff, LEN_PER_JOB * NJOBS);
+
+  gc_job_manager_sptr mgr;
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  mgr = gc_make_job_manager(&opts);
+
+
+  gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq");
+
+  // do all the initialization up front
+
+  for (int i = 0, val = STARTING_VAL; i < NJOBS; i++, val += 3){
+    jd[i] = mgr->alloc_job_desc();
+    init_jd(jd[i], gcp_qa_put_seq);
+    jd[i]->input.nargs = 1;
+    jd[i]->input.arg[0].s32 = val;
+    jd[i]->eaa.nargs = 1;
+    jd[i]->eaa.arg[0].ea_addr = ptr_to_ea(&buf[i * LEN_PER_JOB]);
+    jd[i]->eaa.arg[0].direction = GCJD_DMA_PUT;
+    jd[i]->eaa.arg[0].put_size = LEN_PER_JOB;
+  }
+
+  // submit them all
+
+  for (int i = 0; i < NJOBS; i++){
+    if (!mgr->submit_job(jd[i])){
+      printf("%d: submit_job(jd[%2d]) failed, status = %d\n", __LINE__, i, jd[i]->status);
+    }
+  }
+
+  // wait for them all
+
+  int n = mgr->wait_jobs(NJOBS, jd, done, GC_WAIT_ALL);
+  CPPUNIT_ASSERT_EQUAL(NJOBS, n);
+
+  // check results
+
+  for (int i = 0, val = STARTING_VAL; i < NJOBS; i++, val += 3){
+    CPPUNIT_ASSERT_EQUAL(JS_OK, jd[i]->status);
+    CPPUNIT_ASSERT(confirm_seq(&buf[i * LEN_PER_JOB], LEN_PER_JOB, val));
+  }
+  
+  // cleanup
+  for (int i = 0; i < NJOBS; i++)
+    mgr->free_job_desc(jd[i]);
+}
+
+void
+qa_job_manager::t15_body()
+{
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa_spx);
+  opts.nspes = 1;
+  gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+  gc_job_manager::set_singleton(mgr);
+
+  CPPUNIT_ASSERT(gc_job_manager::singleton());
+  mgr.reset();
+  CPPUNIT_ASSERT_THROW(gc_job_manager::singleton(), boost::bad_weak_ptr);
+}
diff --git a/gcell/lib/runtime/qa_job_manager.h b/gcell/lib/runtime/qa_job_manager.h
new file mode 100644
index 000000000..ab3325b73
--- /dev/null
+++ b/gcell/lib/runtime/qa_job_manager.h
@@ -0,0 +1,89 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_JOB_MANAGER_H
+#define INCLUDED_QA_JOB_MANAGER_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_job_manager;
+typedef void (qa_job_manager::*test_t)();
+
+
+class qa_job_manager : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE(qa_job_manager);
+  CPPUNIT_TEST(t0);
+  CPPUNIT_TEST(t1);
+  CPPUNIT_TEST(t2);
+  CPPUNIT_TEST(t3);
+  CPPUNIT_TEST(t4);
+  CPPUNIT_TEST(t5);
+  CPPUNIT_TEST(t6);
+  CPPUNIT_TEST(t7);
+  CPPUNIT_TEST(t8);
+  CPPUNIT_TEST(t9);
+  CPPUNIT_TEST(t10);
+  CPPUNIT_TEST(t11);
+  CPPUNIT_TEST(t12);
+  CPPUNIT_TEST(t13);
+  CPPUNIT_TEST(t14);
+  CPPUNIT_TEST(t15);
+  CPPUNIT_TEST_SUITE_END();
+
+ private:
+  void leak_check(test_t t, const std::string &name);
+
+  void t0();
+  void t1();
+  void t1_body();
+  void t2();
+  void t2_body();
+  void t3();
+  void t3_body();
+  void t4();
+  void t4_body();
+  void t5();
+  void t5_body();
+  void t6();
+  void t6_body();
+  void t7();
+  void t7_body();
+  void t8();
+  void t8_body();
+  void t9();
+  void t9_body();
+  void t10();
+  void t10_body();
+  void t11();
+  void t11_body();
+  void t12();
+  void t12_body();
+  void t13();
+  void t13_body();
+  void t14();
+  void t14_body();
+  void t15();
+  void t15_body();
+
+};
+
+#endif /* INCLUDED_QA_JOB_MANAGER_H */
diff --git a/gcell/lib/runtime/spu/gc_delay.c b/gcell/lib/runtime/spu/gc_delay.c
new file mode 100644
index 000000000..4d58935aa
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_delay.c
@@ -0,0 +1,58 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/gc_delay.h>
+#include <gcell/compiler.h>
+
+inline static void
+gc_udelay_1us(void)
+{
+  unsigned int i = 158;
+
+  do {  // 20 clocks per iteration of the loop
+    asm ("nop $127; nop $127; nop $127; nop $127; nop $127");
+    asm ("nop $127; nop $127; nop $127; nop $127; nop $127");
+    asm ("nop $127; nop $127; nop $127; nop $127; nop $127");
+    asm ("nop $127; nop $127");
+  } while(--i != 0);
+}
+
+void
+gc_udelay(unsigned int usecs)
+{
+  unsigned int i;
+  for (i = 0; i < usecs; i++)
+    gc_udelay_1us();
+}
+
+void
+gc_cdelay(unsigned int cpu_cycles)
+{
+  if (cpu_cycles < 40)	// roughly the amount of overhead
+    return;
+  
+  cpu_cycles >>= 2;	// about 4 cycles / loop
+
+  while (cpu_cycles-- != 0){
+    asm ("nop $127");	// keeps compiler from removing the loop
+  }
+}
+
diff --git a/gcell/lib/runtime/spu/gc_logging.c b/gcell/lib/runtime/spu/gc_logging.c
new file mode 100644
index 000000000..48b7e5f25
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_logging.c
@@ -0,0 +1,77 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_logging.h>
+#include <gcell/gc_spu_args.h>
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+
+static gc_eaddr_t     log_base_ea;	// base address of log entries in EA
+static uint32_t	      log_idx_mask;	// nentries - 1
+static uint32_t	      log_idx;		// current log entry index
+static uint32_t	      log_seqno;
+
+static int 	      log_tags;		// two consecutive tags
+static int	      tmp_buffer_busy;	// bitmask: buffer busy state
+static int	      tmp_buffer_idx;	// 0 or 1
+static gc_log_entry_t tmp_buffer[2];
+
+void
+_gc_log_init(gc_log_t info)
+{
+  spu_write_decrementer(~0);
+
+  log_base_ea = info.base;
+  log_idx_mask = info.nentries - 1;
+  log_idx = 0;
+  log_seqno = 0;
+
+  log_tags = mfc_multi_tag_reserve(2);
+  tmp_buffer_busy = 0;
+  tmp_buffer_idx = 0;
+
+  gc_log_write0(GCL_SS_SYS, 0);
+}
+
+void
+_gc_log_write(gc_log_entry_t entry)
+{
+  if (log_base_ea == 0)
+    return;
+
+  entry.seqno = log_seqno++;
+  entry.timestamp = spu_read_decrementer();
+
+  if (tmp_buffer_busy & (1 << tmp_buffer_idx)){
+    mfc_write_tag_mask(1 << (log_tags + tmp_buffer_idx));
+    mfc_read_tag_status_all();
+  }
+
+  tmp_buffer[tmp_buffer_idx] = entry;	// save local copy
+
+  mfc_put(&tmp_buffer[tmp_buffer_idx],
+	  log_base_ea + log_idx * sizeof(entry), sizeof(entry),
+	  log_tags + tmp_buffer_idx, 0, 0);
+
+  tmp_buffer_busy |= (1 << tmp_buffer_idx);
+  tmp_buffer_idx ^= 0x1;
+  log_idx = (log_idx + 1) & log_idx_mask;
+}
diff --git a/gcell/lib/runtime/spu/gc_main.c b/gcell/lib/runtime/spu/gc_main.c
new file mode 100644
index 000000000..0866c3c3d
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_main.c
@@ -0,0 +1,708 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+// #define ENABLE_GC_LOGGING 	// define to enable logging
+
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <sync_utils.h>
+#include "gc_spu_config.h"
+#include "spu_buffers.h"
+#include <gcell/gc_spu_args.h>
+#include <gcell/gc_job_desc.h>
+#include <gcell/gc_mbox.h>
+#include <gcell/gc_declare_proc.h>
+#include <gcell/spu/gc_jd_queue.h>
+#include <gcell/spu/gc_random.h>
+#include <gcell/spu/gc_delay.h>
+
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+//! round x down to p2 boundary (p2 must be a power-of-2)
+#define ROUND_DN(x, p2) ((x) & ~((p2)-1))
+
+//! round x up to p2 boundary (p2 must be a power-of-2)
+#define ROUND_UP(x, p2) (((x)+((p2)-1)) & ~((p2)-1))
+
+
+#define USE_LLR_LOST_EVENT	0	// define to 0 or 1
+
+int			gc_sys_tag;	// tag for misc DMA operations
+static gc_spu_args_t	spu_args;
+
+static struct gc_proc_def *gc_proc_def;	// procedure entry points
+
+// ------------------------------------------------------------------------
+
+// state for DMA'ing arguments in and out
+
+static int get_tag;		// 1 tag for job arg gets
+static int put_tags;		// 2 tags for job arg puts
+
+static int pb_idx = 0;		// current put buffer index (0 or 1)
+
+// bitmask (bit per put buffer): bit is set if DMA is started but not complete
+static int put_in_progress = 0;
+#define PBI_MASK(_pbi_) (1 << (_pbi_))
+
+// ------------------------------------------------------------------------
+
+// our working copy of the completion info
+static gc_comp_info_t	comp_info = {  
+  .in_use = 1,
+  .ncomplete = 0
+};
+
+static int ci_idx = 0;		// index of current comp_info
+static int ci_tags;		// two consecutive dma tags
+
+// ------------------------------------------------------------------------
+
+/*
+ * Wait until EA copy of comp_info[idx].in_use is 0
+ */
+static void
+wait_for_ppe_to_be_done_with_comp_info(int idx)
+{
+  char _tmp[256];
+  char *buf = (char *) ALIGN(_tmp, 128);	// get cache-aligned buffer
+  gc_comp_info_t *p = (gc_comp_info_t *) buf;
+
+  assert(sizeof(gc_comp_info_t) == 128);
+
+  do {
+    mfc_get(buf, spu_args.comp_info[idx], 128, gc_sys_tag, 0, 0);
+    mfc_write_tag_mask(1 << gc_sys_tag);
+    mfc_read_tag_status_all();
+    if (p->in_use == 0)
+      return;
+
+    gc_udelay(5);
+
+  } while (1);
+}
+
+static void
+flush_completion_info(void)
+{
+  // events: 0x3X
+
+  static int total_complete = 0;
+
+  if (comp_info.ncomplete == 0)
+    return;
+  
+  // ensure that PPE is done with the buffer we're about to overwrite
+  wait_for_ppe_to_be_done_with_comp_info(ci_idx);
+
+  // dma the comp_info out to PPE
+  int tag = ci_tags + ci_idx;
+  mfc_put(&comp_info, spu_args.comp_info[ci_idx], sizeof(gc_comp_info_t), tag, 0, 0);
+
+  // we need to wait for the completion info to finish, as well as
+  // any EA argument puts.
+
+  int tag_mask = 1 << tag;		// the comp_info tag
+  if (put_in_progress & PBI_MASK(0))
+    tag_mask |= (1 << (put_tags + 0));
+  if (put_in_progress & PBI_MASK(1))
+    tag_mask |= (1 << (put_tags + 1));
+
+  gc_log_write2(GCL_SS_SYS, 0x30, put_in_progress, tag_mask);
+
+  mfc_write_tag_mask(tag_mask);		// the tags we're interested in
+  mfc_read_tag_status_all();		// wait for DMA to complete
+  put_in_progress = 0;			// mark them all complete
+
+  total_complete += comp_info.ncomplete;
+  gc_log_write4(GCL_SS_SYS, 0x31,
+		put_in_progress, ci_idx, comp_info.ncomplete, total_complete);
+
+  // send PPE a message
+  spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
+
+  ci_idx ^= 0x1;	// switch buffers
+  comp_info.in_use = 1;
+  comp_info.ncomplete = 0;
+}
+
+// ------------------------------------------------------------------------
+
+static unsigned int backoff;		// current backoff value in clock cycles
+static unsigned int _backoff_start;
+static unsigned int _backoff_cap;
+
+/*
+ * For 3.2 GHz SPE
+ *
+ * 12    4095 cycles    1.3 us
+ * 13    8191 cycles    2.6 us
+ * 14   16383 cycles    5.1 us
+ * 15   32767 cycles   10.2 us
+ * 16                  20.4 us
+ * 17                  40.8 us
+ * 18                  81.9 us
+ * 19                 163.8 us
+ * 20                 327.7 us
+ * 21                 655.4 us
+ */
+static unsigned char log2_backoff_start[16] = {
+// 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
+// -------------------------------------------------------------
+  12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16
+};
+  
+static unsigned char log2_backoff_cap[16] = {
+// 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
+// -------------------------------------------------------------
+  17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21
+};
+  
+static void
+backoff_init(void)
+{
+  _backoff_cap   = (1 << (log2_backoff_cap[(spu_args.nspus - 1) & 0xf])) - 1;
+  _backoff_start = (1 << (log2_backoff_start[(spu_args.nspus - 1) & 0xf])) - 1;
+  
+  backoff = _backoff_start;
+}
+
+static void 
+backoff_reset(void)
+{
+  backoff = _backoff_start;
+}
+
+#if 0
+
+static void
+backoff_delay(void)
+{
+  gc_cdelay(backoff);
+
+  // capped exponential backoff
+  backoff = ((backoff << 1) + 1) & _backoff_cap;
+}
+
+#else
+
+#define RANDOM_WEIGHT	0.2
+
+static void
+backoff_delay(void)
+{
+  gc_cdelay(backoff);
+
+  backoff = ((backoff << 1) + 1);
+  if (backoff > _backoff_cap)
+    backoff = _backoff_cap;
+
+  float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5)));
+  backoff = backoff * (1.0 + r);
+}
+
+#endif
+
+// ------------------------------------------------------------------------
+
+static inline unsigned int
+make_mask(int nbits)
+{
+  return ~(~0 << nbits);
+}
+
+static unsigned int   dc_work;
+static int            dc_put_tag;
+static unsigned char *dc_ls_base;
+static gc_eaddr_t     dc_ea_base;
+
+// divide and conquer
+static void
+d_and_c(unsigned int offset, unsigned int len)
+{
+  unsigned int mask = make_mask(len) << offset;
+  unsigned int t = mask & dc_work;
+  if (t == 0)		// nothing to do
+    return;
+  if (t == mask){	// got a match, generate dma
+    mfc_put(dc_ls_base + offset, dc_ea_base + offset, len, dc_put_tag, 0, 0);
+  }
+  else {		// bisect
+    len >>= 1;
+    d_and_c(offset, len);
+    d_and_c(offset + len, len);
+  }
+}
+
+// Handle the nasty case of a dma xfer that's less than 16 bytes long.
+// len is guaranteed to be in [1, 15]
+
+static void
+handle_slow_and_tedious_dma(gc_eaddr_t ea, unsigned char *ls,
+			    unsigned int len, int put_tag)
+{
+  // Set up for divide and conquer
+  unsigned int alignment = ((uintptr_t) ls) & 0x7;
+  dc_work = make_mask(len) << alignment;
+  dc_ls_base = (unsigned char *) ROUND_DN((uintptr_t) ls, 8);
+  dc_ea_base = ROUND_DN(ea, (gc_eaddr_t) 8);
+  dc_put_tag = put_tag;
+
+  d_and_c( 0, 8);
+  d_and_c( 8, 8);
+  d_and_c(16, 8);
+}
+
+
+static void
+process_job(gc_eaddr_t jd_ea, gc_job_desc_t *jd)
+{
+  // events: 0x2X
+
+  jd->status = JS_OK;	// assume success
+
+  if (jd->proc_id >= spu_args.nproc_defs)
+    jd->status = JS_UNKNOWN_PROC;
+    
+  else {
+  
+    if (jd->eaa.nargs == 0)
+      (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa);
+
+    else {	// handle EA args that must be DMA'd in/out
+
+      gc_job_ea_args_t *eaa = &jd->eaa;
+
+      int NELMS =
+	MAX(MAX_ARGS_EA,
+	    (GC_SPU_BUFSIZE + MFC_MAX_DMA_SIZE - 1) / MFC_MAX_DMA_SIZE);
+
+      mfc_list_element_t  dma_get_list[NELMS];
+      //mfc_list_element_t  dma_put_list[NELMS];
+      
+      memset(dma_get_list, 0, sizeof(dma_get_list));
+      //memset(dma_put_list, 0, sizeof(dma_put_list));
+
+      int gli = 0;	// get list index
+      //int pli = 0;	// put list index
+
+      unsigned char *get_base = _gci_getbuf[0];
+      unsigned char *get_t = get_base;
+      unsigned int   total_get_dma_len = 0;
+
+      unsigned char *put_base = _gci_putbuf[pb_idx];
+      unsigned char *put_t = put_base;
+      unsigned int   total_put_alloc = 0;
+      int	     put_tag = put_tags + pb_idx;
+
+      // Do we have any "put" args?  If so ensure that previous
+      // dma from this buffer is complete
+
+      gc_log_write2(GCL_SS_SYS, 0x24, put_in_progress, jd->sys.direction_union);
+
+      if ((jd->sys.direction_union & GCJD_DMA_PUT)
+	  && (put_in_progress & PBI_MASK(pb_idx))){
+
+	gc_log_write2(GCL_SS_SYS, 0x25, put_in_progress, 1 << put_tag);
+
+	mfc_write_tag_mask(1 << put_tag);	// the tag we're interested in
+	mfc_read_tag_status_all();		// wait for DMA to complete
+	put_in_progress &= ~(PBI_MASK(pb_idx));
+
+	gc_log_write1(GCL_SS_SYS, 0x26, put_in_progress);
+      }
+
+
+      // for now, all EA's must have the same high 32-bits
+      gc_eaddr_t common_ea = eaa->arg[0].ea_addr;
+
+
+      // assign LS addresses for buffers
+      
+      for (unsigned int i = 0; i < eaa->nargs; i++){
+
+	gc_eaddr_t  	ea_base = 0;
+	unsigned char  *ls_base;
+	int	  	offset;
+	unsigned int	dma_len;
+
+	if (eaa->arg[i].direction == GCJD_DMA_GET){
+	  ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
+	  offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
+	  dma_len = ROUND_UP(eaa->arg[i].get_size + offset, CACHE_LINE_SIZE);
+	  total_get_dma_len += dma_len;
+
+	  if (total_get_dma_len > GC_SPU_BUFSIZE){
+	    jd->status = JS_ARGS_TOO_LONG;
+	    goto wrap_up;
+	  }
+
+	  ls_base = get_t;
+	  get_t += dma_len;
+	  eaa->arg[i].ls_addr = ls_base + offset;
+
+	  if (0){
+	    assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f) == ((intptr_t)eaa->arg[i].ls_addr & 0x7f));
+	    assert((ea_base & 0x7f) == 0);
+	    assert(((intptr_t)ls_base & 0x7f) == 0);
+	    assert((dma_len & 0x7f) == 0);
+	    assert((eaa->arg[i].get_size <= dma_len)
+		   && dma_len <= (eaa->arg[i].get_size + offset + CACHE_LINE_SIZE - 1));
+	  }
+
+	  // add to dma get list 
+	  // FIXME (someday) the dma list is where the JS_BAD_EAH limitation comes from
+
+	  while (dma_len != 0){
+	    int n = MIN(dma_len, MFC_MAX_DMA_SIZE);
+	    dma_get_list[gli].size = n;
+	    dma_get_list[gli].eal = mfc_ea2l(ea_base);
+	    dma_len -= n;
+	    ea_base += n;
+	    gli++;
+	  }
+	}
+
+	else if (eaa->arg[i].direction == GCJD_DMA_PUT){
+	  //
+	  // This case is a trickier than the PUT case since we can't
+	  // write outside of the bounds of the user provided buffer.
+	  // We still align the buffers to 128-bytes for good performance
+	  // in the middle portion of the xfers.
+	  //
+	  ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
+	  offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
+
+	  uint32_t ls_alloc_len =
+	    ROUND_UP(eaa->arg[i].put_size + offset, CACHE_LINE_SIZE);
+
+	  total_put_alloc += ls_alloc_len;
+
+	  if (total_put_alloc > GC_SPU_BUFSIZE){
+	    jd->status = JS_ARGS_TOO_LONG;
+	    goto wrap_up;
+	  }
+
+	  ls_base = put_t;
+	  put_t += ls_alloc_len;
+	  eaa->arg[i].ls_addr = ls_base + offset;
+
+	  if (1){
+	    assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f)
+		   == ((intptr_t)eaa->arg[i].ls_addr & 0x7f));
+	    assert((ea_base & 0x7f) == 0);
+	    assert(((intptr_t)ls_base & 0x7f) == 0);
+	  }
+	}
+
+	else
+	  assert(0);
+      }
+
+      // fire off the dma to fetch the args and wait for it to complete
+      mfc_getl(get_base, common_ea, dma_get_list, gli*sizeof(dma_get_list[0]), get_tag, 0, 0);
+      mfc_write_tag_mask(1 << get_tag);		// the tag we're interested in
+      mfc_read_tag_status_all();		// wait for DMA to complete
+
+      // do the work
+      (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa);
+
+
+      // Do we have any "put" args?  If so copy them out
+      if (jd->sys.direction_union & GCJD_DMA_PUT){
+
+	// Do the copy out using single DMA xfers.  The LS ranges
+	// aren't generally contiguous.
+	
+	bool started_dma = false;
+
+	for (unsigned int i = 0; i < eaa->nargs; i++){
+	  if (eaa->arg[i].direction == GCJD_DMA_PUT && eaa->arg[i].put_size != 0){
+	    
+	    started_dma = true;
+
+	    gc_eaddr_t       ea;
+	    unsigned char   *ls;
+	    unsigned int     len;
+
+	    ea = eaa->arg[i].ea_addr;
+	    ls = (unsigned char *) eaa->arg[i].ls_addr;
+	    len = eaa->arg[i].put_size;
+
+	    if (len < 16)
+	      handle_slow_and_tedious_dma(ea, ls, len, put_tag);
+	    
+	    else {
+	      if ((ea & 0xf) != 0){
+
+		// printf("1:  ea = 0x%x  len = %5d\n", (int) ea, len);
+		
+		// handle the "pre-multiple-of-16" portion
+		// do 1, 2, 4, or 8 byte xfers as required
+
+		if (ea & 0x1){				// do a 1-byte xfer
+		  mfc_put(ls, ea, 1, put_tag, 0, 0);
+		  ea += 1;
+		  ls += 1;
+		  len -= 1;
+		}
+		if (ea & 0x2){				// do a 2-byte xfer
+		  mfc_put(ls, ea, 2, put_tag, 0, 0);
+		  ea += 2;
+		  ls += 2;
+		  len -= 2;
+		}
+		if (ea & 0x4){				// do a 4-byte xfer
+		  mfc_put(ls, ea, 4, put_tag, 0, 0);
+		  ea += 4;
+		  ls += 4;
+		  len -= 4;
+		}
+		if (ea & 0x8){				// do an 8-byte xfer
+		  mfc_put(ls, ea, 8, put_tag, 0, 0);
+		  ea += 8;
+		  ls += 8;
+		  len -= 8;
+		}
+	      }
+
+	      if (1){
+		// printf("2:  ea = 0x%x  len = %5d\n", (int) ea, len);
+		assert((ea & 0xf) == 0);
+		assert((((intptr_t) ls) & 0xf) == 0);
+	      }
+
+	      // handle the "multiple-of-16" portion
+
+	      int aligned_len = ROUND_DN(len, 16);
+	      len = len & (16 - 1);
+
+	      while (aligned_len != 0){
+		int dma_len = MIN(aligned_len, MFC_MAX_DMA_SIZE);
+		mfc_put(ls, ea, dma_len, put_tag, 0, 0);
+		ea += dma_len;
+		ls += dma_len;
+		aligned_len -= dma_len;
+	      }
+
+	      if (1){
+		// printf("3:  ea = 0x%x  len = %5d\n", (int)ea, len);
+		assert((ea & 0xf) == 0);
+		assert((((intptr_t) ls) & 0xf) == 0);
+	      }
+
+	      // handle "post-multiple-of-16" portion
+
+	      if (len != 0){
+
+		if (len >= 8){				// do an 8-byte xfer
+		  mfc_put(ls, ea, 8, put_tag, 0, 0);
+		  ea += 8;
+		  ls += 8;
+		  len -= 8;
+		}
+		if (len >= 4){				// do a 4-byte xfer
+		  mfc_put(ls, ea, 4, put_tag, 0, 0);
+		  ea += 4;
+		  ls += 4;
+		  len -= 4;
+		}
+		if (len >= 2){				// do a 2-byte xfer
+		  mfc_put(ls, ea, 2, put_tag, 0, 0);
+		  ea += 2;
+		  ls += 2;
+		  len -= 2;
+		}
+		if (len >= 1){				// do a 1-byte xfer
+		  mfc_put(ls, ea, 1, put_tag, 0, 0);
+		  ea += 1;
+		  ls += 1;
+		  len -= 1;
+		}
+		if (1)
+		  assert(len == 0);
+	      }
+	    }
+	  }
+	}
+	if (started_dma){
+	  put_in_progress |= PBI_MASK(pb_idx);		// note it's running
+	  gc_log_write2(GCL_SS_SYS, 0x27, put_in_progress, pb_idx);
+	  pb_idx ^= 1;					// toggle current buffer
+	}
+      }
+    }
+  }
+
+ wrap_up:;	// semicolon creates null statement for C99 compliance
+
+  // Copy job descriptor back out to EA.
+  // (The dma will be waited on in flush_completion_info)
+  int tag = ci_tags + ci_idx;			// use the current completion tag
+  mfc_put(jd, jd_ea, sizeof(*jd), tag, 0, 0);
+
+  // Tell PPE we're done with the job.
+  //
+  // We queue these up until we run out of room, or until we can send
+  // the info to the PPE w/o blocking.  The blocking check is in
+  // main_loop
+
+  comp_info.job_id[comp_info.ncomplete++] = jd->sys.job_id;
+
+  if (comp_info.ncomplete == GC_CI_NJOBS){
+    gc_log_write0(GCL_SS_SYS, 0x28);
+    flush_completion_info();
+  }
+}
+
+static void
+main_loop(void)
+{
+  // events: 0x1X
+
+  static gc_job_desc_t	jd;	// static gets us proper alignment
+  gc_eaddr_t		jd_ea;
+  int			total_jobs = 0;
+
+#if (USE_LLR_LOST_EVENT)
+  // setup events
+  spu_writech(SPU_WrEventMask, MFC_LLR_LOST_EVENT);
+
+  // prime the pump
+  while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
+    process_job(jd_ea, &jd);
+  // we're now holding a lock-line reservation
+#endif
+
+  while (1){
+
+#if (USE_LLR_LOST_EVENT)
+
+    if (unlikely(spu_readchcnt(SPU_RdEventStat))){
+      //
+      // execute standard event handling prologue
+      //
+      int status = spu_readch(SPU_RdEventStat);
+      int mask = spu_readch(SPU_RdEventMask);
+      spu_writech(SPU_WrEventMask, mask & ~status);	// disable active events
+      spu_writech(SPU_WrEventAck, status);		// ack active events
+
+      // execute per-event actions
+
+      if (status & MFC_LLR_LOST_EVENT){
+	//
+	// We've lost a line reservation.  This is most likely caused
+	// by somebody doing something to the queue.  Go look and see
+	// if there's anything for us.
+	//
+	while (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
+	  process_job(jd_ea, &jd);
+      }
+
+      //
+      // execute standard event handling epilogue
+      //
+      spu_writech(SPU_WrEventMask, mask);	// restore event mask
+    }
+
+#else
+
+    // try to get a job from the job queue 
+    if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){
+      total_jobs++;
+      gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
+
+      process_job(jd_ea, &jd); 
+
+      gc_log_write2(GCL_SS_SYS, 0x11, jd.sys.job_id, total_jobs);
+      backoff_reset(); 
+    }
+    else
+      backoff_delay();
+
+#endif
+
+    // any msgs for us?
+
+    if (unlikely(spu_readchcnt(SPU_RdInMbox))){
+      int msg = spu_readch(SPU_RdInMbox);
+      // printf("spu[%d] mbox_msg: 0x%08x\n", spu_args.spu_idx, msg);
+      if (MBOX_MSG_OP(msg) == OP_EXIT){
+	flush_completion_info();
+	return;
+      }
+      if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){
+	spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_SPU_BUFSIZE, GC_SPU_BUFSIZE_BASE));
+      }
+    }
+
+    // If we've got job completion info for the PPE and we can send a
+    // message without blocking, do it.
+
+    if (comp_info.ncomplete != 0 && spu_readchcnt(SPU_WrOutIntrMbox) != 0){
+      gc_log_write0(GCL_SS_SYS, 0x12);
+      flush_completion_info();
+    }
+  }
+}
+
+
+int
+main(unsigned long long spe_id __attribute__((unused)),
+     unsigned long long argp,
+     unsigned long long envp __attribute__((unused)))
+{
+  gc_sys_tag = mfc_tag_reserve();	// allocate a tag for our misc DMA operations
+  get_tag  = mfc_tag_reserve();
+  ci_tags  = mfc_multi_tag_reserve(2);
+  put_tags = mfc_multi_tag_reserve(2);
+
+#if 0  
+  printf("gc_sys_tag = %d\n", gc_sys_tag);
+  printf("get_tag    = %d\n", get_tag);
+  printf("ci_tags    = %d\n", ci_tags);
+  printf("put_tags   = %d\n", put_tags);
+#endif
+
+  // dma the args in
+  mfc_get(&spu_args, argp, sizeof(spu_args), gc_sys_tag, 0, 0);
+  mfc_write_tag_mask(1 << gc_sys_tag);	// the tag we're interested in
+  mfc_read_tag_status_all();		// wait for DMA to complete
+
+  // initialize pointer to procedure entry table
+  gc_proc_def = (gc_proc_def_t *) spu_args.proc_def_ls_addr;
+
+  gc_set_seed(spu_args.spu_idx);
+
+  // initialize logging
+  _gc_log_init(spu_args.log);
+
+  backoff_init();		// initialize backoff parameters
+
+  main_loop();
+  return 0;
+}
diff --git a/gcell/lib/runtime/spu/gc_random.c b/gcell/lib/runtime/spu/gc_random.c
new file mode 100644
index 000000000..214309b53
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_random.c
@@ -0,0 +1,40 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <gcell/spu/gc_random.h>
+
+static int last_val = 0;
+
+#define	M  714025	// values from Numerical Recipes in C, 1988
+#define A    4096
+#define C  150889
+
+void 
+gc_set_seed(int seed)
+{
+  last_val = ((unsigned int) seed) % M;
+}
+
+float
+gc_uniform_deviate(void)
+{
+  last_val = (last_val * A + C) % M;
+  return (float) last_val / (float) M;
+}
diff --git a/gcell/lib/runtime/spu/gc_spu_config.h b/gcell/lib/runtime/spu/gc_spu_config.h
new file mode 100644
index 000000000..d0b131e82
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_spu_config.h
@@ -0,0 +1,33 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_SPU_CONFIG_H
+#define INCLUDED_GCELL_GC_SPU_CONFIG_H
+
+#include <gcell/gc_job_desc.h>
+
+#define CACHE_LINE_SIZE	     128	      // in bytes
+#define	GC_SPU_BUFSIZE_BASE  (40 * 1024)      //  must be multiple of CACHE_LINE_SIZE
+#define	GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * CACHE_LINE_SIZE)
+
+#define NGETBUFS	1	// single buffer job arg gets
+#define	NPUTBUFS	2	// double buffer job arg puts
+
+#endif /* INCLUDED_GCELL_GC_SPU_CONFIG_H */
diff --git a/gcell/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/lib/runtime/spu/gc_spu_jd_queue.c
new file mode 100644
index 000000000..42deac34e
--- /dev/null
+++ b/gcell/lib/runtime/spu/gc_spu_jd_queue.c
@@ -0,0 +1,112 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/gc_jd_queue.h>
+#include <gcell/spu/gc_delay.h>
+#include <gcell/spu/gc_random.h>
+#include "mutex_lock.h"
+#include "mutex_unlock.h"
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+extern int gc_sys_tag;
+
+#define	INITIAL_BACKOFF	   32.0
+#define MAX_BACKOFF	16384.0
+#define	RANDOM_WEIGHT	    0.2
+
+static float
+next_backoff(float backoff)
+{
+  // exponential with random
+  float t = backoff * 2.0;
+  if (t > MAX_BACKOFF)
+    t = MAX_BACKOFF;
+
+  float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5)));
+  t = t * (1.0 + r);
+
+  return t;
+}
+
+bool
+gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
+		    int jd_tag, gc_job_desc_t *item)
+{
+  int	status;
+  char 	_tmp[256];
+  gc_jd_queue_t *local_q =
+    (gc_jd_queue_t *) ALIGN(_tmp, 128);		// get cache-aligned buffer
+  
+  float backoff = next_backoff(INITIAL_BACKOFF);
+
+  do {
+    // Copy the queue structure in and get a lock line reservation.
+    // (The structure is 128-byte aligned and completely fills a cache-line)
+
+    mfc_getllar(local_q, q, 0, 0);
+    spu_readch(MFC_RdAtomicStat);
+
+    if (local_q->mutex != 0)		// somebody else has it locked
+      return false;
+
+    if (local_q->head == 0)		// the queue is empty
+      return false;
+
+    // Try to acquire the lock
+
+    local_q->mutex = 1;
+    mfc_putllc(local_q, q, 0, 0);
+    status = spu_readch(MFC_RdAtomicStat);
+
+    if (status != 0){
+      gc_cdelay((int) backoff);
+      backoff = next_backoff(backoff);
+    }
+
+  } while (status != 0);
+
+  // we're now holding the lock
+    
+  // copy in job descriptor at head of queue
+  *item_ea = local_q->head;
+  
+  // We must use the fence with the jd_tag to ensure that any
+  // previously initiated put of a job desc is locally ordered before
+  // the get of the new one.
+  mfc_getf(item, local_q->head, sizeof(gc_job_desc_t), jd_tag, 0, 0);
+  mfc_write_tag_mask(1 << jd_tag);	// the tag we're interested in
+  mfc_read_tag_status_all();		// wait for DMA to complete
+
+  local_q->head = item->sys.next;
+  item->sys.next = 0;
+  if (local_q->head == 0)		// now empty?
+    local_q->tail = 0;
+
+  // Copy the queue struct back out and unlock the mutex in one fell swoop.
+  // We use the unconditional put since it's faster and we own the lock.
+
+  local_q->mutex = 0;
+  mfc_putlluc(local_q, q, 0, 0);
+  spu_readch(MFC_RdAtomicStat);
+
+  return true;
+}
diff --git a/gcell/lib/runtime/spu/gcell_runtime_qa.c b/gcell/lib/runtime/spu/gcell_runtime_qa.c
new file mode 100644
index 000000000..47a4b5b5b
--- /dev/null
+++ b/gcell/lib/runtime/spu/gcell_runtime_qa.c
@@ -0,0 +1,105 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_declare_proc.h>
+#include <gcell/spu/gc_delay.h>
+#include <string.h>
+
+
+static void
+qa_nop(const gc_job_direct_args_t *input _UNUSED,
+       gc_job_direct_args_t *output _UNUSED,
+       const gc_job_ea_args_t *eaa _UNUSED)
+{
+}
+
+GC_DECLARE_PROC(qa_nop, "qa_nop");
+
+static int
+sum_shorts(short *p, int nshorts)
+{
+  int total = 0;
+  for (int i = 0; i < nshorts; i++)
+    total += p[i];
+
+  return total;
+}
+
+static void
+qa_sum_shorts(const gc_job_direct_args_t *input _UNUSED,
+	      gc_job_direct_args_t *output,
+	      const gc_job_ea_args_t *eaa)
+{
+  for (unsigned int i = 0; i < eaa->nargs; i++){
+    short *p = eaa->arg[i].ls_addr;
+    int n = eaa->arg[i].get_size / sizeof(short);
+    output->arg[i].s32 = sum_shorts(p, n);
+    //printf("qa_sum_shorts(%p, %d) = %d\n",  p, n, output->arg[i].s32);
+  }
+}
+
+GC_DECLARE_PROC(qa_sum_shorts, "qa_sum_shorts");
+
+static void
+write_seq(unsigned char *p, int nbytes, int counter)
+{
+  for (int i = 0; i < nbytes; i++)
+    p[i] = counter++;
+}
+
+static void
+qa_put_seq(const gc_job_direct_args_t *input,
+	   gc_job_direct_args_t *output _UNUSED,
+	   const gc_job_ea_args_t *eaa)
+{
+  int counter = input->arg[0].s32;
+
+  for (unsigned int i = 0; i < eaa->nargs; i++){
+    unsigned char *p = eaa->arg[i].ls_addr;
+    int n = eaa->arg[i].put_size;
+    write_seq(p, n, counter);
+    counter += n;
+  }
+}
+
+GC_DECLARE_PROC(qa_put_seq, "qa_put_seq");
+
+static void
+qa_copy(const gc_job_direct_args_t *input _UNUSED,
+	gc_job_direct_args_t *output,
+	const gc_job_ea_args_t *eaa)
+{
+  if (eaa->nargs != 2
+      || eaa->arg[0].direction != GCJD_DMA_PUT
+      || eaa->arg[1].direction != GCJD_DMA_GET){
+    output->arg[0].s32 = -1;
+    return;
+  }
+
+  output->arg[0].s32 = 0;
+  unsigned n = eaa->arg[0].put_size;
+  if (eaa->arg[1].get_size < n)
+    n = eaa->arg[1].get_size;
+  
+  memcpy(eaa->arg[0].ls_addr, eaa->arg[1].ls_addr, n);
+}
+
+GC_DECLARE_PROC(qa_copy, "qa_copy");
diff --git a/gcell/lib/runtime/spu/spu_buffers.c b/gcell/lib/runtime/spu/spu_buffers.c
new file mode 100644
index 000000000..29ae99d9f
--- /dev/null
+++ b/gcell/lib/runtime/spu/spu_buffers.c
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "spu_buffers.h"
+#include <gcell/compiler.h>
+
+static unsigned char _getbuf[NGETBUFS][GC_SPU_BUFSIZE] _AL128;
+static unsigned char _putbuf[NPUTBUFS][GC_SPU_BUFSIZE] _AL128;
+
+unsigned char *_gci_getbuf[NGETBUFS] = {
+  _getbuf[0]
+};
+
+unsigned char *_gci_putbuf[NPUTBUFS] = {
+  _putbuf[0],
+  _putbuf[1]
+};
diff --git a/gcell/lib/runtime/spu/spu_buffers.h b/gcell/lib/runtime/spu/spu_buffers.h
new file mode 100644
index 000000000..24811dc38
--- /dev/null
+++ b/gcell/lib/runtime/spu/spu_buffers.h
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_SPU_BUFFERS_H
+#define INCLUDED_SPU_BUFFERS_H
+
+#include "gc_spu_config.h"
+
+//! pointer to input buffer
+extern unsigned char *_gci_getbuf[NGETBUFS];
+
+//! pointers to output buffers
+extern unsigned char *_gci_putbuf[NPUTBUFS];
+
+#endif /* INCLUDED_SPU_BUFFERS_H */
diff --git a/gcell/lib/spu/Makefile.am b/gcell/lib/spu/Makefile.am
new file mode 100644
index 000000000..5afa40e36
--- /dev/null
+++ b/gcell/lib/spu/Makefile.am
@@ -0,0 +1,138 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+# We're building a single .a file from files in several
+# subdirectories.  We use the "single Makefile, multiple subdirectory"
+# automake alternative. We're doing this because we're faking out
+# automake and getting it to build for 2 architectures at the same
+# time, the PPE (powerpc64) and the SPE.  The easiest way to handle
+# the SPE was to just build a static library using automake's built in
+# rules, since trying to get libtool to handle two architectures in
+# the same tree seemed untenable.
+
+include $(top_srcdir)/Makefile.common.spu
+
+IBM_SPU_SYNC_INCLUDES = -I$(top_srcdir)/gcell/ibm/sync/spu_source
+AM_CPPFLAGS = $(GCELL_SPU_INCLUDES) $(IBM_SPU_SYNC_INCLUDES)
+
+libspu_LIBRARIES = libgcell_spu.a
+
+# ----------------------------------------------------------------
+# files in the lib/runtime/spu directory
+
+runtime_srcdir = $(srcdir)/../runtime/spu
+
+runtime_spu_sources = \
+	$(runtime_srcdir)/gc_delay.c \
+	$(runtime_srcdir)/gc_spu_jd_queue.c \
+	$(runtime_srcdir)/spu_buffers.c \
+	$(runtime_srcdir)/gc_logging.c \
+	$(runtime_srcdir)/gc_main.c \
+	$(runtime_srcdir)/gc_random.c
+
+
+runtime_spu_headers =
+
+runtime_spu_noinst_headers = \
+	$(runtime_srcdir)/gc_spu_config.h \
+	$(runtime_srcdir)/spu_buffers.h 
+
+# ----------------------------------------------------------------
+# files in the lib/general/spu directory
+
+general_srcdir = $(srcdir)/../general/spu
+
+general_spu_sources = \
+	$(general_srcdir)/fft_1d_r2.c \
+	$(general_srcdir)/memset.S
+
+general_spu_headers =
+# These now live in include/gcell/spu
+#general_spu_headers = \
+#	$(general_srcdir)/gc_spu_macs.h \
+#	$(general_srcdir)/libfft.h
+
+general_spu_noinst_headers =
+# As do these
+#general_spu_noinst_headers = \
+#	$(general_srcdir)/fft_1d.h \
+#	$(general_srcdir)/fft_1d_r2.h
+
+# The QA code for (usually) non-PPE visible support routines in lib/general/spu
+general_spu_qa_sources = \
+	$(general_srcdir)/qa_memset.c
+
+# ----------------------------------------------------------------
+# files in the lib/wrapper/spu directory
+
+wrapper_srcdir = $(srcdir)/../wrapper/spu
+
+wrapper_spu_sources = \
+	$(wrapper_srcdir)/gcs_fft_1d_r2.c
+
+wrapper_spu_headers =
+
+wrapper_spu_noinst_headers =
+
+# ----------------------------------------------------------------
+# build the library from the files in the three directories
+
+libgcell_spu_a_SOURCES = \
+	$(runtime_spu_sources) \
+	$(general_spu_sources) \
+	$(wrapper_spu_sources)
+
+gcellspuinclude_HEADERS = \
+	$(runtime_spu_headers) \
+	$(general_spu_headers) \
+	$(wrapper_spu_headers)
+
+noinst_HEADERS = \
+	$(runtime_spu_noinst_headers) \
+	$(general_spu_noinst_headers) \
+	$(wrapper_spu_noinst_headers)
+
+# ----------------------------------------------------------------
+# build some SPU executables
+
+libspu_PROGRAMS = \
+	gcell_all
+
+noinst_PROGRAMS = \
+	gcell_runtime_qa \
+	gcell_general_qa
+
+#
+# All known non-QA gcell procs (at least until they get too big).
+#
+gcell_all_SOURCES = $(wrapper_spu_sources)
+gcell_all_LDADD = libgcell_spu.a
+
+#
+# The QA code required for testing the runtime.
+#
+gcell_runtime_qa_SOURCES = $(runtime_srcdir)/gcell_runtime_qa.c
+gcell_runtime_qa_LDADD = libgcell_spu.a
+
+#
+# The QA code required for testing the SPE support routines in lib/general/spu
+#
+gcell_general_qa_SOURCES = $(general_spu_qa_sources)
+gcell_general_qa_LDADD = libgcell_spu.a
diff --git a/gcell/lib/wrapper/Makefile.am b/gcell/lib/wrapper/Makefile.am
new file mode 100644
index 000000000..5a8e328c2
--- /dev/null
+++ b/gcell/lib/wrapper/Makefile.am
@@ -0,0 +1,71 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+AM_CPPFLAGS = $(DEFINES) $(GCELL_INCLUDES) $(FFTW3F_CFLAGS) $(WITH_INCLUDES)
+
+noinst_LTLIBRARIES = libwrapper.la libwrapper-qa.la
+
+#
+# generate libtool.lo's that contain embedded SPU executables
+#
+gcell_all.lo: ../spu/gcell_all
+	$(GCELL_EMBEDSPU_LIBTOOL) $< $@ 
+
+gcell_general_qa.lo: ../spu/gcell_general_qa
+	$(GCELL_EMBEDSPU_LIBTOOL) $< $@
+
+
+
+# The primary library
+
+libwrapper_la_SOURCES = \
+	gcp_fft_1d_r2.cc
+
+libwrapper_la_LIBADD = \
+	gcell_all.lo
+
+
+# The QA library
+
+libwrapper_qa_la_SOURCES = \
+	qa_gcell_general.cc \
+	qa_gcell_wrapper.cc \
+	qa_gcp_fft_1d_r2.cc
+
+libwrapper_qa_la_LIBADD = \
+	gcell_general_qa.lo \
+	-lfftw3f
+
+# Headers
+
+# Moved to include/gcell
+#gcellinclude_HEADERS = \
+#	gcp_fft_1d_r2.h
+
+noinst_HEADERS = \
+	qa_gcell_general.h \
+	qa_gcell_wrapper.h \
+	qa_gcp_fft_1d_r2.h
+
+
+CLEANFILES = gcell_all.lo gcell_general_qa.lo
+
diff --git a/gcell/lib/wrapper/gcp_fft_1d_r2.cc b/gcell/lib/wrapper/gcp_fft_1d_r2.cc
new file mode 100644
index 000000000..9e7e00f49
--- /dev/null
+++ b/gcell/lib/wrapper/gcp_fft_1d_r2.cc
@@ -0,0 +1,119 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <gcell/gcp_fft_1d_r2.h>
+#include <stdint.h>
+#include <stdexcept>
+#include <math.h>
+
+static void
+init_jd(gc_job_desc *jd,
+	gc_proc_id_t proc_id,
+	unsigned log2_fft_length,
+	bool shift,
+	std::complex<float> *out,
+	const std::complex<float> *in,
+	const std::complex<float> *twiddle,
+	const float *window)
+{
+  jd->proc_id = proc_id;
+  jd->input.nargs = 2;
+  jd->output.nargs = 0;
+  jd->eaa.nargs = 4;
+
+  jd->input.arg[0].u32 = log2_fft_length;
+  jd->input.arg[1].u32 = shift;
+  unsigned int fft_length = 1 << log2_fft_length;
+
+  jd->eaa.arg[0].ea_addr = ptr_to_ea(out);
+  jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+  jd->eaa.arg[0].put_size = sizeof(std::complex<float>) * fft_length;
+
+  jd->eaa.arg[1].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(in));
+  jd->eaa.arg[1].direction = GCJD_DMA_GET;
+  jd->eaa.arg[1].get_size = sizeof(std::complex<float>) * fft_length;
+
+  jd->eaa.arg[2].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(twiddle));
+  jd->eaa.arg[2].direction = GCJD_DMA_GET;
+  jd->eaa.arg[2].get_size = sizeof(std::complex<float>) * fft_length / 4;
+
+  jd->eaa.arg[3].ea_addr = ptr_to_ea(const_cast<float*>(window));
+  jd->eaa.arg[3].direction = GCJD_DMA_GET;
+  if (window == 0)
+    jd->eaa.arg[3].get_size = 0;
+  else
+    jd->eaa.arg[3].get_size = sizeof(float) * fft_length;
+}
+
+  
+gc_job_desc_sptr
+gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
+		     unsigned int log2_fft_length,
+		     bool forward,
+		     bool shift,
+		     std::complex<float> *out,
+		     const std::complex<float> *in,
+		     const std::complex<float> *twiddle,
+		     const float *window)
+{
+  unsigned int fft_length = 1 << log2_fft_length;
+  if (fft_length > 4096)
+    throw std::invalid_argument("fft_length > 4096");
+
+  if ((intptr_t)out & 0xf)
+    throw gc_bad_align("out");
+  if ((intptr_t)in & 0xf)
+    throw gc_bad_align("in");
+  if ((intptr_t)twiddle & 0xf)
+    throw gc_bad_align("twiddle");
+  if ((intptr_t)window & 0xf)
+    throw gc_bad_align("window");
+
+  std::string proc_name;
+  if (forward)
+    proc_name = "fwd_fft_1d_r2";
+  else
+    proc_name = "inv_fft_1d_r2";
+
+  gc_proc_id_t fft_id = mgr->lookup_proc(proc_name);
+  gc_job_desc_sptr jd = gc_job_manager::alloc_job_desc(mgr);
+  init_jd(jd.get(), fft_id, log2_fft_length, shift, out, in, twiddle, window);
+  if (!mgr->submit_job(jd.get())){
+    gc_job_status_t s = jd->status;
+    throw gc_bad_submit(proc_name, s);
+  }
+  return jd;
+}
+
+void
+gcp_fft_1d_r2_twiddle(unsigned int log2_fft_length, std::complex<float> *twiddle)
+{
+  unsigned int n = 1 << log2_fft_length;
+
+  twiddle[0].real() = 1.0;
+  twiddle[0].imag() = 0.0;
+  for (unsigned i=1; i < n/4; i++){
+    twiddle[i].real() =  cos(i * 2*M_PI/n);
+    twiddle[n/4 - i].imag() = -twiddle[i].real();
+  }
+}
diff --git a/gcell/lib/wrapper/qa_gcell_general.cc b/gcell/lib/wrapper/qa_gcell_general.cc
new file mode 100644
index 000000000..af9719812
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcell_general.cc
@@ -0,0 +1,83 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_gcell_general.h"
+#include <cppunit/TestAssert.h>
+
+#include <stdio.h>
+#include <stdlib.h>	// random, posix_memalign
+#include <algorithm>
+#include <string.h>
+#include <gcell/gc_job_manager.h>
+
+
+// handle to embedded SPU executable
+extern spe_program_handle_t gcell_general_qa_spx;
+
+gc_job_desc_sptr
+gcp_qa_general_submit(gc_job_manager_sptr mgr, const std::string &proc_name)
+{
+  gc_proc_id_t proc_id = mgr->lookup_proc(proc_name);
+  gc_job_desc_sptr jd = gc_job_manager::alloc_job_desc(mgr);
+
+  jd->proc_id = proc_id;
+  jd->input.nargs = 0;
+  jd->output.nargs = 1;
+  jd->eaa.nargs = 0;
+
+  if (!mgr->submit_job(jd.get())){
+    gc_job_status_t s = jd->status;
+    throw gc_bad_submit(proc_name, s);
+  }
+  return jd;
+}
+
+
+bool
+qa_gcell_general::generic_test_body(const std::string &proc_name)
+{
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_general_qa_spx);
+  opts.nspes = 1;
+  gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+  gc_job_desc_sptr jd = gcp_qa_general_submit(mgr, proc_name);
+  if (!mgr->wait_job(jd.get())){
+    fprintf(stderr, "wait_job for %s failed: %s\n",
+	    proc_name.c_str(),
+	    gc_job_status_string(jd->status).c_str());
+    CPPUNIT_ASSERT(0);
+  }
+
+  return jd->output.arg[0].u32;		// bool result from SPE code
+}
+
+/*
+ * ------------------------------------------------------------------------
+ *		    Add more calls to SPE QA code here...
+ * ------------------------------------------------------------------------
+ */
+void
+qa_gcell_general::test_memset()
+{
+  CPPUNIT_ASSERT(generic_test_body("qa_memset"));
+}
+
diff --git a/gcell/lib/wrapper/qa_gcell_general.h b/gcell/lib/wrapper/qa_gcell_general.h
new file mode 100644
index 000000000..f1e64e717
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcell_general.h
@@ -0,0 +1,40 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_QA_GCELL_GENERAL_H
+#define INCLUDED_QA_GCELL_GENERAL_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_gcell_general : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE(qa_gcell_general);
+  CPPUNIT_TEST(test_memset);
+  CPPUNIT_TEST_SUITE_END();
+
+ private:
+  void test_memset();
+
+  bool generic_test_body(const std::string &proc_name);
+};
+
+#endif /* INCLUDED_QA_GCELL_GENERAL_H */
diff --git a/gcell/lib/wrapper/qa_gcell_wrapper.cc b/gcell/lib/wrapper/qa_gcell_wrapper.cc
new file mode 100644
index 000000000..d53c61057
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcell_wrapper.cc
@@ -0,0 +1,41 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * This class gathers together all the test cases for the lib/wrapper
+ * directory into a single test suite.  As you create new test cases,
+ * add them here.
+ */
+
+#include <qa_gcell_wrapper.h>
+#include <qa_gcell_general.h>
+#include <qa_gcp_fft_1d_r2.h>
+
+CppUnit::TestSuite *
+qa_gcell_wrapper::suite()
+{
+  CppUnit::TestSuite	*s = new CppUnit::TestSuite("wrapper");
+
+  s->addTest(qa_gcell_general::suite());
+  s->addTest(qa_gcp_fft_1d_r2::suite());
+
+  return s;
+}
diff --git a/gcell/lib/wrapper/qa_gcell_wrapper.h b/gcell/lib/wrapper/qa_gcell_wrapper.h
new file mode 100644
index 000000000..cb29db883
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcell_wrapper.h
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_GCELL_WRAPPER_H
+#define INCLUDED_QA_GCELL_WRAPPER_H
+
+#include <cppunit/TestSuite.h>
+
+//! collect all the tests for the wrapper directory
+
+class qa_gcell_wrapper {
+public:
+  //! return suite of tests
+  static CppUnit::TestSuite *suite();
+};
+
+
+#endif /* INCLUDED_QA_GCELL_WRAPPER_H */
diff --git a/gcell/lib/wrapper/qa_gcp_fft_1d_r2.cc b/gcell/lib/wrapper/qa_gcp_fft_1d_r2.cc
new file mode 100644
index 000000000..742c624dc
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcp_fft_1d_r2.cc
@@ -0,0 +1,208 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_gcp_fft_1d_r2.h"
+#include <cppunit/TestAssert.h>
+#include <gcell/gcp_fft_1d_r2.h>
+#include <fftw3.h>
+#include <stdio.h>
+#include <stdlib.h>	// random, posix_memalign
+#include <algorithm>
+#include <string.h>
+
+typedef boost::shared_ptr<void> void_sptr;
+
+// handle to embedded SPU executable
+extern spe_program_handle_t gcell_all_spx;
+
+/*
+ * Return pointer to cache-aligned chunk of storage of size size bytes.
+ * Throw if can't allocate memory.  The storage should be freed
+ * with "free" when done.  The memory is initialized to zero.
+ */
+static void *
+aligned_alloc(size_t size, size_t alignment = 128)
+{
+  void *p = 0;
+  if (posix_memalign(&p, alignment, size) != 0){
+    perror("posix_memalign");
+    throw std::runtime_error("memory");
+  }
+  memset(p, 0, size);		// zero the memory
+  return p;
+}
+
+class free_deleter {
+public:
+  void operator()(void *p) {
+    free(p);
+  }
+};
+
+static boost::shared_ptr<void>
+aligned_alloc_sptr(size_t size, size_t alignment = 128)
+{
+  return boost::shared_ptr<void>(aligned_alloc(size, alignment), free_deleter());
+}
+
+// test forward FFT
+void
+qa_gcp_fft_1d_r2::t1()
+{
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_all_spx);
+  opts.nspes = 1;
+  gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+#if 1
+  for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){
+    test(mgr, log2_fft_size, true);
+  }
+#else
+  test(mgr, 5, true);
+#endif
+}
+
+// test inverse FFT
+void
+qa_gcp_fft_1d_r2::t2()
+{
+  gc_jm_options opts;
+  opts.program_handle = gc_program_handle_from_address(&gcell_all_spx);
+  opts.nspes = 1;
+  gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+#if 1
+  for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){
+    test(mgr, log2_fft_size, false);
+  }
+#else
+  test(mgr, 5, false);
+#endif
+}
+
+void
+qa_gcp_fft_1d_r2::t3()
+{
+  // FIXME Test fwd and inv with windowing option
+}
+
+void
+qa_gcp_fft_1d_r2::t4()
+{
+  // FIXME Test fwd and inv with shift option
+}
+
+static inline float
+abs_diff(std::complex<float> x, std::complex<float> y)
+{
+  return std::max(std::abs(x.real()-y.real()),
+		  std::abs(x.imag()-y.imag()));
+}
+
+static float
+float_abs_rel_error(float ref, float actual)
+{
+  float delta = ref - actual;
+  if (std::abs(ref) < 1e-18)
+    ref = 1e-18;
+  return std::abs(delta/ref);
+}
+
+static float
+abs_rel_error(std::complex<float> ref, std::complex<float> actual)
+{
+  return std::max(float_abs_rel_error(ref.real(), actual.real()),
+		  float_abs_rel_error(ref.imag(), actual.imag()));
+}
+
+void 
+qa_gcp_fft_1d_r2::test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward)
+{
+  int fft_size = 1 << log2_fft_size;
+
+  // allocate aligned buffers with boost shared_ptr's
+  void_sptr fftw_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+  void_sptr fftw_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+  void_sptr cell_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+  void_sptr cell_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+  void_sptr cell_twiddle_void = aligned_alloc_sptr(fft_size/4 * sizeof(std::complex<float>), 128);
+
+  // cast them to the type we really want
+  std::complex<float> *fftw_in = (std::complex<float> *) fftw_in_void.get();
+  std::complex<float> *fftw_out = (std::complex<float> *) fftw_out_void.get();
+  std::complex<float> *cell_in = (std::complex<float> *) cell_in_void.get();
+  std::complex<float> *cell_out = (std::complex<float> *) cell_out_void.get();
+  std::complex<float> *cell_twiddle = (std::complex<float> *) cell_twiddle_void.get();
+
+  gcp_fft_1d_r2_twiddle(log2_fft_size, cell_twiddle);
+
+  srandom(1);		// we want reproducibility
+
+  // initialize the input buffers
+  for (int i = 0; i < fft_size; i++){
+    std::complex<float> t((float) (random() & 0xfffff), (float) (random() & 0xfffff));
+    fftw_in[i] = t;
+    cell_in[i] = t;
+  }
+
+  // ------------------------------------------------------------------------
+  // compute the reference answer
+  fftwf_plan plan = fftwf_plan_dft_1d (fft_size,
+				       reinterpret_cast<fftwf_complex *>(fftw_in), 
+				       reinterpret_cast<fftwf_complex *>(fftw_out),
+				       forward ? FFTW_FORWARD : FFTW_BACKWARD,
+				       FFTW_ESTIMATE);
+  if (plan == 0){
+    fprintf(stderr, "qa_gcp_fft_1d_r2: error creating FFTW plan\n");
+    throw std::runtime_error ("fftwf_plan_dft_r2c_1d failed");
+  }
+  
+  fftwf_execute(plan);
+  fftwf_destroy_plan(plan);
+
+  // ------------------------------------------------------------------------
+  // compute the answer on the cell
+  gc_job_desc_sptr jd = gcp_fft_1d_r2_submit(mgr, log2_fft_size, forward, false,
+					     cell_out, cell_in, cell_twiddle, 0);
+  if (!mgr->wait_job(jd.get())){
+    fprintf(stderr, "wait_job failed: %s\n", gc_job_status_string(jd->status).c_str());
+    CPPUNIT_ASSERT(0);
+  }
+
+  // ------------------------------------------------------------------------
+  // compute the maximum of the relative error
+  float max_rel = 0.0;
+  for (int i = 0; i < fft_size; i++){
+    max_rel = std::max(max_rel, abs_rel_error(fftw_out[i], cell_out[i]));
+    if (0)
+      printf("(%16.3f, %16.3fj)  (%16.3f, %16.3fj)  (%16.3f, %16.3fj)\n",
+	     fftw_out[i].real(), fftw_out[i].imag(),
+	     cell_out[i].real(), cell_out[i].imag(),
+	     fftw_out[i].real() - cell_out[i].real(),
+	     fftw_out[i].imag() - cell_out[i].imag());
+  }
+
+  fprintf(stdout, "%s fft_size = %4d  max_rel_error = %e\n",
+	  forward ? "fwd" : "rev", fft_size, max_rel);
+
+  CPPUNIT_ASSERT(max_rel <= 5e-3);
+}
diff --git a/gcell/lib/wrapper/qa_gcp_fft_1d_r2.h b/gcell/lib/wrapper/qa_gcp_fft_1d_r2.h
new file mode 100644
index 000000000..339ddd25a
--- /dev/null
+++ b/gcell/lib/wrapper/qa_gcp_fft_1d_r2.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_GCP_FFT_1D_R2_H
+#define INCLUDED_QA_GCP_FFT_1D_R2_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+#include <gcell/gc_job_manager.h>
+
+class qa_gcp_fft_1d_r2 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE(qa_gcp_fft_1d_r2);
+  CPPUNIT_TEST(t1);
+  CPPUNIT_TEST(t2);
+  CPPUNIT_TEST(t3);
+  CPPUNIT_TEST(t4);
+  CPPUNIT_TEST_SUITE_END();
+
+ private:
+  void t1();
+  void t2();
+  void t3();
+  void t4();
+
+  void test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward);
+};
+
+
+
+#endif /* INCLUDED_QA_GCP_FFT_1D_R2_H */
diff --git a/gcell/lib/wrapper/spu/gcs_fft_1d_r2.c b/gcell/lib/wrapper/spu/gcs_fft_1d_r2.c
new file mode 100644
index 000000000..582757ab0
--- /dev/null
+++ b/gcell/lib/wrapper/spu/gcs_fft_1d_r2.c
@@ -0,0 +1,94 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_declare_proc.h>
+#include <gcell/spu/libfft.h>
+#include <assert.h>
+
+/*
+ * v is really vector complex<float>
+ */
+static void
+conjugate_vector(vector float *v, int nelements)
+{
+  vector float k = {1, -1, 1, -1};
+  int i;
+  for (i = 0; i < nelements; i++)
+    v[i] *= k;
+}
+
+static void
+gcs_fwd_fft_1d_r2(const gc_job_direct_args_t *input,
+		  gc_job_direct_args_t *output __attribute__((unused)),
+		  const gc_job_ea_args_t *eaa)
+{
+  vector float *out = (vector float *) eaa->arg[0].ls_addr;	// complex
+  vector float *in = (vector float *) eaa->arg[1].ls_addr;	// complex
+  vector float *twiddle = (vector float *) eaa->arg[2].ls_addr;	// complex
+  vector float *window = (vector float *) eaa->arg[3].ls_addr;	// float
+
+  int log2_fft_length = input->arg[0].u32;
+  int shift = input->arg[1].u32;	// non-zero if we should apply fftshift
+
+  if (eaa->arg[3].get_size){	// apply window
+    // FIXME pointwise multiply in *= window
+    assert(0);
+  }
+
+  fft_1d_r2(out, in, twiddle, log2_fft_length);
+
+  if (shift){
+    // FIXME apply "fftshift" to output data in-place
+    assert(0);
+  }
+}
+
+GC_DECLARE_PROC(gcs_fwd_fft_1d_r2, "fwd_fft_1d_r2");
+
+static void
+gcs_inv_fft_1d_r2(const gc_job_direct_args_t *input,
+		  gc_job_direct_args_t *output __attribute__((unused)),
+		  const gc_job_ea_args_t *eaa)
+{
+  vector float *out = (vector float *) eaa->arg[0].ls_addr;	// complex
+  vector float *in = (vector float *) eaa->arg[1].ls_addr;	// complex
+  vector float *twiddle = (vector float *) eaa->arg[2].ls_addr;	// complex
+  vector float *window = (vector float *) eaa->arg[3].ls_addr;	// float
+
+  int log2_fft_length = input->arg[0].u32;
+  int shift = input->arg[1].u32;	// non-zero if we should apply fftshift
+
+  if (eaa->arg[3].get_size){	// apply window
+    // FIXME pointwise multiply in *= window
+    assert(0);
+  }
+
+  if (shift){
+    // FIXME apply "fftshift" to input data in-place
+    assert(0);
+  }
+
+  conjugate_vector(in, 1 << (log2_fft_length - 1));
+  fft_1d_r2(out, in, twiddle, log2_fft_length);
+  conjugate_vector(out, 1 << (log2_fft_length - 1));
+}
+
+GC_DECLARE_PROC(gcs_inv_fft_1d_r2, "inv_fft_1d_r2");