summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.common3
-rw-r--r--config/grc_gcell.m46
-rw-r--r--gcell/src/apps/benchmark_dma.cc6
-rw-r--r--gcell/src/apps/benchmark_nop.cc8
-rw-r--r--gcell/src/apps/test_all.cc7
-rw-r--r--gcell/src/lib/Makefile.am4
-rw-r--r--gcell/src/lib/general/Makefile.am1
-rw-r--r--gcell/src/lib/general/spu/fft_1d.h103
-rw-r--r--gcell/src/lib/general/spu/fft_1d_r2.c35
-rw-r--r--gcell/src/lib/general/spu/fft_1d_r2.h529
-rw-r--r--gcell/src/lib/general/spu/libfft.h113
-rw-r--r--gcell/src/lib/runtime/Makefile.am28
-rw-r--r--gcell/src/lib/runtime/gc_job_manager.cc112
-rw-r--r--gcell/src/lib/runtime/gc_job_manager.h116
-rw-r--r--gcell/src/lib/runtime/gc_job_manager_impl.cc29
-rw-r--r--gcell/src/lib/runtime/gc_job_manager_impl.h5
-rwxr-xr-xgcell/src/lib/runtime/gcell-embedspu-libtool29
-rw-r--r--gcell/src/lib/runtime/qa_gcell_runtime.cc (renamed from gcell/src/lib/runtime/qa_lib.cc)6
-rw-r--r--gcell/src/lib/runtime/qa_gcell_runtime.h (renamed from gcell/src/lib/runtime/qa_lib.h)10
-rw-r--r--gcell/src/lib/runtime/qa_job_manager.cc97
-rw-r--r--gcell/src/lib/runtime/spu/gc_spu_config.h2
-rw-r--r--gcell/src/lib/runtime/spu/gcell_runtime_qa.c (renamed from gcell/src/lib/runtime/spu/gcell_qa.c)0
-rw-r--r--gcell/src/lib/spu/Makefile.am41
-rw-r--r--gcell/src/lib/wrapper/Makefile.am (renamed from gcell/src/lib/procs/Makefile.am)28
-rw-r--r--gcell/src/lib/wrapper/gcp_fft_1d_r2.cc116
-rw-r--r--gcell/src/lib/wrapper/gcp_fft_1d_r2.h66
-rw-r--r--gcell/src/lib/wrapper/qa_gcell_wrapper.cc39
-rw-r--r--gcell/src/lib/wrapper/qa_gcell_wrapper.h35
-rw-r--r--gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc211
-rw-r--r--gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h48
-rw-r--r--gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c39
31 files changed, 1723 insertions, 149 deletions
diff --git a/Makefile.common b/Makefile.common
index ace03b90c..0ee6f2bf3 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -96,6 +96,9 @@ GCELL_LA = @gcell_LA@
GCELL_SPU_INCLUDES = @gcell_spu_INCLUDES@
GCELL_SPU_LA = @gcell_spu_LA@
+# libtool aware wrapper for ppu-embedspu
+GCELL_EMBEDSPU_LIBTOOL = @abs_top_srcdir@/gcell/src/lib/runtime/gcell-embedspu-libtool
+
# This used to be set in configure.ac but is now defined here for all
# Makefiles when this fragment is included.
STD_DEFINES_AND_INCLUDES=$(DEFINES) $(OMNITHREAD_INCLUDES) $(GNURADIO_INCLUDES) $(BOOST_CFLAGS)
diff --git a/config/grc_gcell.m4 b/config/grc_gcell.m4
index 6d7144131..a229211aa 100644
--- a/config/grc_gcell.m4
+++ b/config/grc_gcell.m4
@@ -55,13 +55,13 @@ AC_DEFUN([GRC_GCELL],[
gcell_INCLUDES="-I\${abs_top_srcdir}/gcell/src/include \
-I\${abs_top_srcdir}/gcell/src/lib/runtime \
-I\${abs_top_srcdir}/gcell/src/lib/general \
- -I\${abs_top_srcdir}/gcell/src/lib/procs"
+ -I\${abs_top_srcdir}/gcell/src/lib/wrapper"
gcell_LA="\${abs_top_builddir}/gcell/src/lib/libgcell.la"
gcell_spu_INCLUDES="-I\${abs_top_srcdir}/gcell/src/include/spu \
-I\${abs_top_srcdir}/gcell/src/include \
-I\${abs_top_srcdir}/gcell/src/lib/runtime/spu \
-I\${abs_top_srcdir}/gcell/src/lib/general/spu \
- -I\${abs_top_srcdir}/gcell/src/lib/procs/spu"
+ -I\${abs_top_srcdir}/gcell/src/lib/wrapper/spu"
gcell_spu_LA="\${abs_top_builddir}/gcell/src/lib/spu/libgcell_spu.a"
AC_SUBST(gcell_spu_INCLUDES)
AC_SUBST(gcell_spu_LA)
@@ -76,7 +76,7 @@ AC_DEFUN([GRC_GCELL],[
gcell/src/lib/Makefile \
gcell/src/lib/spu/Makefile \
gcell/src/lib/general/Makefile \
- gcell/src/lib/procs/Makefile \
+ gcell/src/lib/wrapper/Makefile \
gcell/src/lib/runtime/Makefile \
gcell/src/apps/Makefile \
gcell/src/apps/spu/Makefile \
diff --git a/gcell/src/apps/benchmark_dma.cc b/gcell/src/apps/benchmark_dma.cc
index b0af8b74b..961876ad8 100644
--- a/gcell/src/apps/benchmark_dma.cc
+++ b/gcell/src/apps/benchmark_dma.cc
@@ -121,11 +121,11 @@ run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size, int getp
}
gc_jm_options opts;
- opts.program_handle = &benchmark_procs;
+ opts.program_handle = gc_program_handle_from_address(&benchmark_procs);
opts.nspes = nspes;
//opts.enable_logging = true;
//opts.log2_nlog_entries = 13;
- gc_job_manager *mgr = gc_make_job_manager(&opts);
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == GCP_UNKNOWN_PROC){
fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n");
@@ -211,8 +211,6 @@ run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size, int getp
(double) njobs * dma_size / delta * (getput_mask == BENCHMARK_GET_PUT ? 2.0 : 1.0));
}
-
- delete mgr;
}
static void
diff --git a/gcell/src/apps/benchmark_nop.cc b/gcell/src/apps/benchmark_nop.cc
index 2d3611fd9..b87137d50 100644
--- a/gcell/src/apps/benchmark_nop.cc
+++ b/gcell/src/apps/benchmark_nop.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -57,10 +57,10 @@ run_test(unsigned int nspes, unsigned int usecs, int njobs)
bool done[NJDS];
gc_jm_options opts;
- opts.program_handle = &benchmark_procs;
+ opts.program_handle = gc_program_handle_from_address(&benchmark_procs);
opts.nspes = nspes;
opts.gang_schedule = true;
- gc_job_manager *mgr = gc_make_job_manager(&opts);
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == GCP_UNKNOWN_PROC){
fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n");
@@ -127,8 +127,6 @@ run_test(unsigned int nspes, unsigned int usecs, int njobs)
printf("nspes: %2d udelay: %4d elapsed_time: %7.3f njobs: %g speedup: %6.3f\n",
mgr->nspes(), usecs, delta, (double) njobs,
njobs * usecs * 1e-6 / delta);
-
- delete mgr;
}
int
diff --git a/gcell/src/apps/test_all.cc b/gcell/src/apps/test_all.cc
index e652de21d..798549be1 100644
--- a/gcell/src/apps/test_all.cc
+++ b/gcell/src/apps/test_all.cc
@@ -21,8 +21,8 @@
#include <cppunit/TextTestRunner.h>
-#include <qa_lib.h>
-
+#include <qa_gcell_runtime.h>
+#include <qa_gcell_wrapper.h>
int
main(int argc, char **argv)
@@ -30,7 +30,8 @@ main(int argc, char **argv)
CppUnit::TextTestRunner runner;
- runner.addTest(qa_lib::suite());
+ runner.addTest(qa_gcell_runtime::suite());
+ runner.addTest(qa_gcell_wrapper::suite());
bool was_successful = runner.run("", false);
diff --git a/gcell/src/lib/Makefile.am b/gcell/src/lib/Makefile.am
index 2ccedc332..e7b349331 100644
--- a/gcell/src/lib/Makefile.am
+++ b/gcell/src/lib/Makefile.am
@@ -20,7 +20,7 @@
include $(top_srcdir)/Makefile.common
-SUBDIRS = spu runtime general procs .
+SUBDIRS = spu runtime general wrapper .
# generate libgcell.la from the convenience libraries in subdirs
@@ -34,11 +34,13 @@ libgcell_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
libgcell_la_LIBADD = \
runtime/libruntime.la \
+ wrapper/libwrapper.la \
-lspe2 \
$(OMNITHREAD_LA)
libgcell_qa_la_LIBADD = \
runtime/libruntime-qa.la \
+ wrapper/libwrapper-qa.la \
$(CPPUNIT_LIBS)
diff --git a/gcell/src/lib/general/Makefile.am b/gcell/src/lib/general/Makefile.am
index 0e32ffc37..bd5a4de62 100644
--- a/gcell/src/lib/general/Makefile.am
+++ b/gcell/src/lib/general/Makefile.am
@@ -20,5 +20,4 @@
include $(top_srcdir)/Makefile.common
-# SUBDIRS = spu .
diff --git a/gcell/src/lib/general/spu/fft_1d.h b/gcell/src/lib/general/spu/fft_1d.h
new file mode 100644
index 000000000..355b84bf1
--- /dev/null
+++ b/gcell/src/lib/general/spu/fft_1d.h
@@ -0,0 +1,103 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _FFT_1D_H_
+#define _FFT_1D_H_ 1
+
+#include <spu_intrinsics.h>
+
+/* BIT_SWAP - swaps up to 16 bits of the integer _i according to the
+ * pattern specified by _pat.
+ */
+#define BIT_SWAP(_i, _pat) spu_extract(spu_gather(spu_shuffle(spu_maskb(_i), _pat, _pat)), 0)
+
+
+#ifndef MAX_FFT_1D_SIZE
+#define MAX_FFT_1D_SIZE 8192
+#endif
+
+#ifndef INV_SQRT_2
+#define INV_SQRT_2 0.7071067811865
+#endif
+
+
+/* The following macro, FFT_1D_BUTTERFLY, performs a 4 way SIMD basic butterfly
+ * operation. The inputs are in parallel arrays (seperate real and imaginary
+ * vectors).
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ */
+
+#define FFT_1D_BUTTERFLY(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \
+ vector float _qw_re, _qw_im; \
+ \
+ _qw_re = spu_msub(_q_re, _W_re, spu_mul(_q_im, _W_im)); \
+ _qw_im = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \
+ _P_re = spu_add(_p_re, _qw_re); \
+ _P_im = spu_add(_p_im, _qw_im); \
+ _Q_re = spu_sub(_p_re, _qw_re); \
+ _Q_im = spu_sub(_p_im, _qw_im); \
+}
+
+
+/* FFT_1D_BUTTERFLY_HI is equivalent to FFT_1D_BUTTERFLY with twiddle factors (W_im, -W_re)
+ */
+#define FFT_1D_BUTTERFLY_HI(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \
+ vector float _qw_re, _qw_im; \
+ \
+ _qw_re = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \
+ _qw_im = spu_msub(_q_im, _W_im, spu_mul(_q_re, _W_re)); \
+ _P_re = spu_add(_p_re, _qw_re); \
+ _P_im = spu_add(_p_im, _qw_im); \
+ _Q_re = spu_sub(_p_re, _qw_re); \
+ _Q_im = spu_sub(_p_im, _qw_im); \
+}
+
+#endif /* _FFT_1D_H_ */
diff --git a/gcell/src/lib/general/spu/fft_1d_r2.c b/gcell/src/lib/general/spu/fft_1d_r2.c
new file mode 100644
index 000000000..a0660b307
--- /dev/null
+++ b/gcell/src/lib/general/spu/fft_1d_r2.c
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <libfft.h>
+#include <fft_1d_r2.h>
+#include <assert.h>
+
+/*
+ * invoke the inline version
+ */
+void
+fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size)
+{
+ assert((1 << log2_size) <= MAX_FFT_1D_SIZE);
+
+ _fft_1d_r2(out, in, W, log2_size);
+}
diff --git a/gcell/src/lib/general/spu/fft_1d_r2.h b/gcell/src/lib/general/spu/fft_1d_r2.h
new file mode 100644
index 000000000..a51cbc341
--- /dev/null
+++ b/gcell/src/lib/general/spu/fft_1d_r2.h
@@ -0,0 +1,529 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _FFT_1D_R2_H_
+#define _FFT_1D_R2_H_ 1
+
+#include "fft_1d.h"
+
+/* fft_1d_r2
+ * ---------
+ * Performs a single precision, complex Fast Fourier Transform using
+ * the DFT (Discrete Fourier Transform) with radix-2 decimation in time.
+ * The input <in> is an array of complex numbers of length (1<<log2_size)
+ * entries. The result is returned in the array of complex numbers specified
+ * by <out>. Note: This routine can support an in-place transformation
+ * by specifying <in> and <out> to be the same array.
+ *
+ * This implementation utilizes the Cooley-Tukey algorithm consisting
+ * of <log2_size> stages. The basic operation is the butterfly.
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ *
+ * This routine also requires pre-computed twiddle values, W. W is an
+ * array of single precision complex numbers of length 1<<(log2_size-2)
+ * and is computed as follows:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[i].imag = -sin(i * 2*PI/n);
+ * }
+ *
+ * This array actually only contains the first half of the twiddle
+ * factors. Due for symmetry, the second half of the twiddle factors
+ * are implied and equal:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n);
+ * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n);
+ * }
+ *
+ * Further symmetry allows one to generate the twiddle factor table
+ * using half the number of trig computations as follows:
+ *
+ * W[0].real = 1.0;
+ * W[0].imag = 0.0;
+ * for (i=1; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[n/4 - i].imag = -W[i].real;
+ * }
+ *
+ * The complex numbers are packed into quadwords as follows:
+ *
+ * quadword complex
+ * array element array elements
+ * -----------------------------------------------------
+ * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 |
+ * -----------------------------------------------------
+ *
+ */
+
+
+static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size)
+{
+ int i, j, k;
+ int stage, offset;
+ int i_rev;
+ int n, n_2, n_4, n_8, n_16, n_3_16;
+ int w_stride, w_2stride, w_3stride, w_4stride;
+ int stride, stride_2, stride_4, stride_3_4;
+ vector float *W0, *W1, *W2, *W3;
+ vector float *re0, *re1, *re2, *re3;
+ vector float *im0, *im1, *im2, *im3;
+ vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7;
+ vector float *out0, *out1, *out2, *out3;
+ vector float tmp0, tmp1;
+ vector float w0_re, w0_im, w1_re, w1_im;
+ vector float w0, w1, w2, w3;
+ vector float src_lo0, src_lo1, src_lo2, src_lo3;
+ vector float src_hi0, src_hi1, src_hi2, src_hi3;
+ vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3;
+ vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3;
+ vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3;
+ vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3;
+ vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3;
+ vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3;
+ vector float re_lo0, re_lo1, re_lo2, re_lo3;
+ vector float im_lo0, im_lo1, im_lo2, im_lo3;
+ vector float re_hi0, re_hi1, re_hi2, re_hi3;
+ vector float im_hi0, im_hi1, im_hi2, im_hi3;
+ vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3;
+ vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3;
+ vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */
+ vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f};
+ vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f};
+ vector unsigned char reverse;
+ vector unsigned char shuf_lo = (vector unsigned char) {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 16,17,18,19, 20,21,22,23};
+ vector unsigned char shuf_hi = (vector unsigned char) {
+ 8, 9,10,11, 12,13,14,15,
+ 24,25,26,27, 28,29,30,31};
+ vector unsigned char shuf_0202 = (vector unsigned char) {
+ 0, 1, 2, 3, 8, 9,10,11,
+ 0, 1, 2, 3, 8, 9,10,11};
+ vector unsigned char shuf_1313 = (vector unsigned char) {
+ 4, 5, 6, 7, 12,13,14,15,
+ 4, 5, 6, 7, 12,13,14,15};
+ vector unsigned char shuf_0303 = (vector unsigned char) {
+ 0, 1, 2, 3, 12,13,14,15,
+ 0, 1, 2, 3, 12,13,14,15};
+ vector unsigned char shuf_1212 = (vector unsigned char) {
+ 4, 5, 6, 7, 8, 9,10,11,
+ 4, 5, 6, 7, 8, 9,10,11};
+ vector unsigned char shuf_0415 = (vector unsigned char) {
+ 0, 1, 2, 3, 16,17,18,19,
+ 4, 5, 6, 7, 20,21,22,23};
+ vector unsigned char shuf_2637 = (vector unsigned char) {
+ 8, 9,10,11, 24,25,26,27,
+ 12,13,14,15,28,29,30,31};
+ vector unsigned char shuf_0246 = (vector unsigned char) {
+ 0, 1, 2, 3, 8, 9,10,11,
+ 16,17,18,19,24,25,26,27};
+ vector unsigned char shuf_1357 = (vector unsigned char) {
+ 4, 5, 6, 7, 12,13,14,15,
+ 20,21,22,23,28,29,30,31};
+
+ n = 1 << log2_size;
+ n_2 = n >> 1;
+ n_4 = n >> 2;
+ n_8 = n >> 3;
+ n_16 = n >> 4;
+
+ n_3_16 = n_8 + n_16;
+
+ /* Compute a byte reverse shuffle pattern to be used to produce
+ * an address bit swap.
+ */
+ reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size),
+ spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8,
+ 7, 6, 5, 4, 3, 2,1,0}),
+ log2_size-16));
+
+ /* Perform the first 3 stages of the FFT. These stages differs from
+ * other stages in that the inputs are unscrambled and the data is
+ * reformated into parallel arrays (ie, seperate real and imaginary
+ * arrays). The term "unscramble" means the bit address reverse the
+ * data array. In addition, the first three stages have simple twiddle
+ * weighting factors.
+ * stage 1: (1, 0)
+ * stage 2: (1, 0) and (0, -1)
+ * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707)
+ *
+ * The arrays are processed as two halves, simultaneously. The lo (first
+ * half) and hi (second half). This is done because the scramble
+ * shares source value between each half of the output arrays.
+ */
+ i = 0;
+ i_rev = 0;
+
+ in0 = in;
+ in1 = in + n_8;
+ in2 = in + n_16;
+ in3 = in + n_3_16;
+
+ in4 = in + n_4;
+ in5 = in1 + n_4;
+ in6 = in2 + n_4;
+ in7 = in3 + n_4;
+
+ re0 = re;
+ re1 = re + n_8;
+ im0 = im;
+ im1 = im + n_8;
+
+ w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2};
+ w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2};
+
+ do {
+ src_lo0 = in0[i_rev];
+ src_lo1 = in1[i_rev];
+ src_lo2 = in2[i_rev];
+ src_lo3 = in3[i_rev];
+
+ src_hi0 = in4[i_rev];
+ src_hi1 = in5[i_rev];
+ src_hi2 = in6[i_rev];
+ src_hi3 = in7[i_rev];
+
+ /* Perform scramble.
+ */
+ dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo);
+ dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi);
+ dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo);
+ dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi);
+ dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo);
+ dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi);
+ dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo);
+ dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi);
+
+ /* Perform the stage 1 butterfly. The multiplier constant, ppmm,
+ * is used to control the sign of the operands since a single
+ * quadword contains both of P and Q valule of the butterfly.
+ */
+ pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8));
+ pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8));
+ pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8));
+ pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8));
+ pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8));
+ pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8));
+ pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8));
+ pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8));
+
+ /* Perfrom the stage 2 butterfly. For this stage, the
+ * inputs pq are still interleaved (p.real, p.imag, q.real,
+ * q.imag), so we must first re-order the data into
+ * parallel arrays as well as perform the reorder
+ * associated with the twiddle W[n/4], which equals
+ * (0, -1).
+ *
+ * ie. (A, B) * (0, -1) => (B, -A)
+ */
+ re_lo0 = spu_madd(ppmm,
+ spu_shuffle(pq_lo1, pq_lo1, shuf_0303),
+ spu_shuffle(pq_lo0, pq_lo0, shuf_0202));
+ im_lo0 = spu_madd(pmmp,
+ spu_shuffle(pq_lo1, pq_lo1, shuf_1212),
+ spu_shuffle(pq_lo0, pq_lo0, shuf_1313));
+
+ re_lo1 = spu_madd(ppmm,
+ spu_shuffle(pq_lo3, pq_lo3, shuf_0303),
+ spu_shuffle(pq_lo2, pq_lo2, shuf_0202));
+ im_lo1 = spu_madd(pmmp,
+ spu_shuffle(pq_lo3, pq_lo3, shuf_1212),
+ spu_shuffle(pq_lo2, pq_lo2, shuf_1313));
+
+
+ re_hi0 = spu_madd(ppmm,
+ spu_shuffle(pq_hi1, pq_hi1, shuf_0303),
+ spu_shuffle(pq_hi0, pq_hi0, shuf_0202));
+ im_hi0 = spu_madd(pmmp,
+ spu_shuffle(pq_hi1, pq_hi1, shuf_1212),
+ spu_shuffle(pq_hi0, pq_hi0, shuf_1313));
+
+ re_hi1 = spu_madd(ppmm,
+ spu_shuffle(pq_hi3, pq_hi3, shuf_0303),
+ spu_shuffle(pq_hi2, pq_hi2, shuf_0202));
+ im_hi1 = spu_madd(pmmp,
+ spu_shuffle(pq_hi3, pq_hi3, shuf_1212),
+ spu_shuffle(pq_hi2, pq_hi2, shuf_1313));
+
+
+ /* Perform stage 3 butterfly.
+ */
+ FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ re0 += 2;
+ re1 += 2;
+ im0 += 2;
+ im1 += 2;
+
+ i += 8;
+ i_rev = BIT_SWAP(i, reverse) / 2;
+ } while (i < n_2);
+
+ /* Process stages 4 to log2_size-2
+ */
+ for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) {
+ w_stride = n_2 >> stage;
+ w_2stride = n >> stage;
+ w_3stride = w_stride + w_2stride;
+ w_4stride = w_2stride + w_2stride;
+
+ W0 = W;
+ W1 = W + w_stride;
+ W2 = W + w_2stride;
+ W3 = W + w_3stride;
+
+ stride_2 = stride >> 1;
+ stride_4 = stride >> 2;
+ stride_3_4 = stride_2 + stride_4;
+
+ re0 = re; im0 = im;
+ re1 = re + stride_2; im1 = im + stride_2;
+ re2 = re + stride_4; im2 = im + stride_4;
+ re3 = re + stride_3_4; im3 = im + stride_3_4;
+
+ for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) {
+ /* Compute the twiddle factors
+ */
+ w0 = W0[offset];
+ w1 = W1[offset];
+ w2 = W2[offset];
+ w3 = W3[offset];
+
+ tmp0 = spu_shuffle(w0, w2, shuf_0415);
+ tmp1 = spu_shuffle(w1, w3, shuf_0415);
+
+ w0_re = spu_shuffle(tmp0, tmp1, shuf_0415);
+ w0_im = spu_shuffle(tmp0, tmp1, shuf_2637);
+
+ j = i;
+ k = i + stride;
+ do {
+ re_lo0 = re0[j]; im_lo0 = im0[j];
+ re_lo1 = re1[j]; im_lo1 = im1[j];
+
+ re_hi0 = re2[j]; im_hi0 = im2[j];
+ re_hi1 = re3[j]; im_hi1 = im3[j];
+
+ re_lo2 = re0[k]; im_lo2 = im0[k];
+ re_lo3 = re1[k]; im_lo3 = im1[k];
+
+ re_hi2 = re2[k]; im_hi2 = im2[k];
+ re_hi3 = re3[k]; im_hi3 = im3[k];
+
+ FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im);
+
+ j += 2 * stride;
+ k += 2 * stride;
+ } while (j < n_4);
+ }
+ }
+
+ /* Process stage log2_size-1. This is identical to the stage processing above
+ * except for this stage the inner loop is only executed once so it is removed
+ * entirely.
+ */
+ w_stride = n_2 >> stage;
+ w_2stride = n >> stage;
+ w_3stride = w_stride + w_2stride;
+ w_4stride = w_2stride + w_2stride;
+
+ stride_2 = stride >> 1;
+ stride_4 = stride >> 2;
+
+ stride_3_4 = stride_2 + stride_4;
+
+ re0 = re; im0 = im;
+ re1 = re + stride_2; im1 = im + stride_2;
+ re2 = re + stride_4; im2 = im + stride_4;
+ re3 = re + stride_3_4; im3 = im + stride_3_4;
+
+ for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) {
+ /* Compute the twiddle factors
+ */
+ w0 = W[offset];
+ w1 = W[offset + w_stride];
+ w2 = W[offset + w_2stride];
+ w3 = W[offset + w_3stride];
+
+ tmp0 = spu_shuffle(w0, w2, shuf_0415);
+ tmp1 = spu_shuffle(w1, w3, shuf_0415);
+
+ w0_re = spu_shuffle(tmp0, tmp1, shuf_0415);
+ w0_im = spu_shuffle(tmp0, tmp1, shuf_2637);
+
+ j = i;
+ k = i + stride;
+
+ re_lo0 = re0[j]; im_lo0 = im0[j];
+ re_lo1 = re1[j]; im_lo1 = im1[j];
+
+ re_hi0 = re2[j]; im_hi0 = im2[j];
+ re_hi1 = re3[j]; im_hi1 = im3[j];
+
+ re_lo2 = re0[k]; im_lo2 = im0[k];
+ re_lo3 = re1[k]; im_lo3 = im1[k];
+
+ re_hi2 = re2[k]; im_hi2 = im2[k];
+ re_hi3 = re3[k]; im_hi3 = im3[k];
+
+ FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im);
+ }
+
+
+ /* Process the final stage (stage log2_size). For this stage,
+ * reformat the data from parallel arrays back into
+ * interleaved arrays,storing the result into <in>.
+ *
+ * This loop has been manually unrolled by 2 to improve
+ * dual issue rates and reduce stalls. This unrolling
+ * forces a minimum FFT size of 32.
+ */
+ re0 = re;
+ re1 = re + n_8;
+ re2 = re + n_16;
+ re3 = re + n_3_16;
+
+ im0 = im;
+ im1 = im + n_8;
+ im2 = im + n_16;
+ im3 = im + n_3_16;
+
+ out0 = out;
+ out1 = out + n_4;
+ out2 = out + n_8;
+ out3 = out1 + n_8;
+
+ i = n_16;
+
+ do {
+ /* Fetch the twiddle factors
+ */
+ w0 = W[0];
+ w1 = W[1];
+ w2 = W[2];
+ w3 = W[3];
+
+ W += 4;
+
+ w0_re = spu_shuffle(w0, w1, shuf_0246);
+ w0_im = spu_shuffle(w0, w1, shuf_1357);
+ w1_re = spu_shuffle(w2, w3, shuf_0246);
+ w1_im = spu_shuffle(w2, w3, shuf_1357);
+
+ /* Fetch the butterfly inputs, reals and imaginaries
+ */
+ re_lo0 = re0[0]; im_lo0 = im0[0];
+ re_lo1 = re1[0]; im_lo1 = im1[0];
+ re_lo2 = re0[1]; im_lo2 = im0[1];
+ re_lo3 = re1[1]; im_lo3 = im1[1];
+
+ re_hi0 = re2[0]; im_hi0 = im2[0];
+ re_hi1 = re3[0]; im_hi1 = im3[0];
+ re_hi2 = re2[1]; im_hi2 = im2[1];
+ re_hi3 = re3[1]; im_hi3 = im3[1];
+
+ re0 += 2; im0 += 2;
+ re1 += 2; im1 += 2;
+ re2 += 2; im2 += 2;
+ re3 += 2; im3 += 2;
+
+ /* Perform the butterflys
+ */
+ FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im);
+
+ FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im);
+
+ /* Interleave the results and store them into the output buffers (ie,
+ * the original input buffers.
+ */
+ out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415);
+ out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637);
+ out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415);
+ out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637);
+
+ out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415);
+ out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637);
+ out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415);
+ out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637);
+
+ out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415);
+ out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637);
+ out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415);
+ out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637);
+
+ out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415);
+ out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637);
+ out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415);
+ out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637);
+
+ out0 += 4;
+ out1 += 4;
+ out2 += 4;
+ out3 += 4;
+
+ i -= 2;
+ } while (i);
+}
+
+#endif /* _FFT_1D_R2_H_ */
diff --git a/gcell/src/lib/general/spu/libfft.h b/gcell/src/lib/general/spu/libfft.h
new file mode 100644
index 000000000..dd387be0c
--- /dev/null
+++ b/gcell/src/lib/general/spu/libfft.h
@@ -0,0 +1,113 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2008 Free Software Foundation, Inc. */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+
+#ifndef INCLUDED_LIBFFT_H
+#define INCLUDED_LIBFFT_H
+
+// must be defined before inclusion of fft_1d_r2.h
+#define MAX_FFT_1D_SIZE 4096
+
+/* fft_1d_r2
+ * ---------
+ * Performs a single precision, complex Fast Fourier Transform using
+ * the DFT (Discrete Fourier Transform) with radix-2 decimation in time.
+ * The input <in> is an array of complex numbers of length (1<<log2_size)
+ * entries. The result is returned in the array of complex numbers specified
+ * by <out>. Note: This routine can support an in-place transformation
+ * by specifying <in> and <out> to be the same array.
+ *
+ * This implementation utilizes the Cooley-Tukey algorithm consisting
+ * of <log2_size> stages. The basic operation is the butterfly.
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ *
+ * This routine also requires pre-computed twiddle values, W. W is an
+ * array of single precision complex numbers of length 1<<(log2_size-2)
+ * and is computed as follows:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[i].imag = -sin(i * 2*PI/n);
+ * }
+ *
+ * This array actually only contains the first half of the twiddle
+ * factors. Due for symmetry, the second half of the twiddle factors
+ * are implied and equal:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n);
+ * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n);
+ * }
+ *
+ * Further symmetry allows one to generate the twiddle factor table
+ * using half the number of trig computations as follows:
+ *
+ * W[0].real = 1.0;
+ * W[0].imag = 0.0;
+ * for (i=1; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[n/4 - i].imag = -W[i].real;
+ * }
+ *
+ * The complex numbers are packed into quadwords as follows:
+ *
+ * quadword complex
+ * array element array elements
+ * -----------------------------------------------------
+ * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 |
+ * -----------------------------------------------------
+ *
+ */
+
+void fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size);
+
+#endif /* INCLUDED_LIBFFT_H */
diff --git a/gcell/src/lib/runtime/Makefile.am b/gcell/src/lib/runtime/Makefile.am
index 3f2077c08..a68d2bcd0 100644
--- a/gcell/src/lib/runtime/Makefile.am
+++ b/gcell/src/lib/runtime/Makefile.am
@@ -20,8 +20,6 @@
include $(top_srcdir)/Makefile.common
-# SUBDIRS = spu .
-
IBM_PPU_SYNC_INCLUDES = -I$(top_srcdir)/gcell/src/ibm/sync/ppu_source
@@ -29,6 +27,8 @@ AM_CPPFLAGS = $(DEFINES) $(OMNITHREAD_INCLUDES) $(MBLOCK_INCLUDES) $(CPPUNIT_INC
$(GCELL_INCLUDES) $(IBM_PPU_SYNC_INCLUDES) $(WITH_INCLUDES)
+dist_bin_SCRIPTS = gcell-embedspu-libtool
+
noinst_LTLIBRARIES = libruntime.la libruntime-qa.la
libruntime_la_SOURCES = \
@@ -39,7 +39,7 @@ libruntime_la_SOURCES = \
gc_proc_def_utils.cc
libruntime_qa_la_SOURCES = \
- qa_lib.cc \
+ qa_gcell_runtime.cc \
qa_jd_queue.cc \
qa_jd_stack.cc \
qa_job_manager.cc
@@ -55,22 +55,14 @@ noinst_HEADERS = \
qa_jd_queue.h \
qa_jd_stack.h \
qa_job_manager.h \
- qa_lib.h
+ qa_gcell_runtime.h
-
-# This kruft is required to link the QA SPU executable into the PPE shared lib w/o warnings
-gcell_qa.lo: ../spu/gcell_qa
- ppu-embedspu -m32 -fpic gcell_qa ../spu/gcell_qa .libs/gcell_qa.o
- @rm -f gcell_qa.lo
- @echo "# gcell_qa.lo - a libtool object file" >> gcell_qa.lo
- @echo "# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)" >> gcell_qa.lo
- @echo "#" >> gcell_qa.lo
- @echo "# Please DO NOT delete this file!" >> gcell_qa.lo
- @echo "# It is necessary for linking the library." >> gcell_qa.lo
- @echo "" >> gcell_qa.lo
- @echo "pic_object='.libs/gcell_qa.o'" >> gcell_qa.lo
- @echo "non_pic_object=none" >> gcell_qa.lo
+# generate a libtool.lo that contains an embeded SPU executable
+gcell_runtime_qa.lo: ../spu/gcell_runtime_qa
+ $(GCELL_EMBEDSPU_LIBTOOL) $@ $<
libruntime_qa_la_LIBADD = \
- gcell_qa.lo \
+ gcell_runtime_qa.lo \
libruntime.la
+
+CLEANFILES = gcell_runtime_qa.lo
diff --git a/gcell/src/lib/runtime/gc_job_manager.cc b/gcell/src/lib/runtime/gc_job_manager.cc
index 94090bedf..9ede5e156 100644
--- a/gcell/src/lib/runtime/gc_job_manager.cc
+++ b/gcell/src/lib/runtime/gc_job_manager.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -24,11 +24,17 @@
#endif
#include "gc_job_manager.h"
#include "gc_job_manager_impl.h"
+#include <boost/weak_ptr.hpp>
+#include <stdio.h>
-gc_job_manager *
+
+static boost::weak_ptr<gc_job_manager> s_singleton;
+
+
+gc_job_manager_sptr
gc_make_job_manager(const gc_jm_options *options)
{
- return new gc_job_manager_impl(options);
+ return gc_job_manager_sptr(new gc_job_manager_impl(options));
}
gc_job_manager::gc_job_manager(const gc_jm_options *options)
@@ -52,3 +58,103 @@ gc_job_manager::debug()
{
return 0;
}
+
+void
+gc_job_manager::set_singleton(gc_job_manager_sptr mgr)
+{
+ s_singleton = mgr;
+}
+
+gc_job_manager_sptr
+gc_job_manager::singleton()
+{
+ return gc_job_manager_sptr(s_singleton);
+}
+
+// ------------------------------------------------------------------------
+
+
+// custom deleter
+class spe_program_handle_deleter {
+public:
+ void operator()(spe_program_handle_t *program) {
+ if (program){
+ int r = spe_image_close(program);
+ if (r != 0){
+ perror("spe_image_close");
+ }
+ }
+ }
+};
+
+// nop custom deleter
+class nop_spe_program_handle_deleter {
+public:
+ void operator()(spe_program_handle_t *program) {
+ }
+};
+
+spe_program_handle_sptr
+gc_program_handle_from_filename(const std::string &filename)
+{
+ return spe_program_handle_sptr(spe_image_open(filename.c_str()),
+ spe_program_handle_deleter());
+}
+
+
+spe_program_handle_sptr
+gc_program_handle_from_address(spe_program_handle_t *handle)
+{
+ return spe_program_handle_sptr(handle, nop_spe_program_handle_deleter());
+}
+
+const std::string
+gc_job_status_string(gc_job_status_t status)
+{
+ switch(status){
+ case JS_OK: return "JS_OK";
+ case JS_SHUTTING_DOWN: return "JS_SHUTTING_DOWN";
+ case JS_TOO_MANY_CLIENTS: return "JS_TOO_MANY_CLIENTS";
+ case JS_UNKNOWN_PROC: return "JS_UNKNOWN_PROC";
+ case JS_BAD_DIRECTION: return "JS_BAD_DIRECTION";
+ case JS_BAD_EAH: return "JS_BAD_EAH";
+ case JS_BAD_N_DIRECT: return "JS_BAD_N_DIRECT";
+ case JS_BAD_N_EA: return "JS_BAD_N_EA";
+ case JS_ARGS_TOO_LONG: return "JS_ARGS_TOO_LONG";
+ case JS_BAD_JUJU: return "JS_BAD_JUJU";
+ case JS_BAD_JOB_DESC: return "JS_BAD_JOB_DESC";
+ default:
+ char buf[100];
+ snprintf(buf, sizeof(buf), "unknown gc_job_status_t (%d)\n", status);
+ return buf;
+ }
+}
+
+/*
+ * exception classes
+ */
+
+gc_exception::gc_exception(const std::string &msg)
+ : runtime_error(msg)
+{
+}
+
+gc_unknown_proc::gc_unknown_proc(const std::string &msg)
+ : gc_exception("gc_unknown_proc: " + msg)
+{
+}
+
+gc_bad_alloc::gc_bad_alloc(const std::string &msg)
+ : gc_exception("gc_bad_alloc: " + msg)
+{
+}
+
+gc_bad_align::gc_bad_align(const std::string &msg)
+ : gc_exception("gc_bad_align: " + msg)
+{
+}
+
+gc_bad_submit::gc_bad_submit(const std::string &name, gc_job_status_t status)
+ : gc_exception("gc_bad_submit(" + name + "): " + gc_job_status_string(status))
+{
+}
diff --git a/gcell/src/lib/runtime/gc_job_manager.h b/gcell/src/lib/runtime/gc_job_manager.h
index 9c8e70bf8..aa30dc24b 100644
--- a/gcell/src/lib/runtime/gc_job_manager.h
+++ b/gcell/src/lib/runtime/gc_job_manager.h
@@ -23,17 +23,52 @@
#define INCLUDED_GC_JOB_MANAGER_H
#include <boost/utility.hpp>
+#include <boost/shared_ptr.hpp>
#include <vector>
#include <string>
+#include <stdexcept>
#include <libspe2.h>
#include "gc_job_desc.h"
class gc_job_manager;
+typedef boost::shared_ptr<gc_job_manager> gc_job_manager_sptr;
+typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr;
-enum gc_wait_mode {
- GC_WAIT_ANY,
- GC_WAIT_ALL,
-};
+/*!
+ * \brief Return a boost::shared_ptr to an spe_program_handle_t
+ *
+ * \param filename is the name of the SPE ELF executable to open.
+ *
+ * Calls spe_image_open to open the file. If successful returns a
+ * boost::shared_ptr that will call spe_image_close when it's time to
+ * free the object.
+ *
+ * Returns the equivalent of the NULL pointer if the file cannot be
+ * opened, or if it's not an SPE ELF object file.
+ *
+ * \sa gc_program_handle_from_address
+ */
+spe_program_handle_sptr
+gc_program_handle_from_filename(const std::string &filename);
+
+/*!
+ * \brief Return a boost::shared_ptr to an spe_program_handle_t
+ *
+ * \param handle is a non-zero pointer to an embedded SPE image.
+ *
+ * If successful returns a boost::shared_ptr that does nothing when
+ * it's time to free the object.
+ *
+ * \sa gc_program_handle_from_filename
+ */
+spe_program_handle_sptr
+gc_program_handle_from_address(spe_program_handle_t *handle);
+
+/*!
+ * \brief map gc_job_status_t into a string
+ */
+const std::string
+gc_job_status_string(gc_job_status_t status);
/*
* \brief Options that configure the job_manager.
@@ -46,23 +81,59 @@ struct gc_jm_options {
bool gang_schedule; // shall we gang schedule?
bool use_affinity; // shall we try for affinity (FIXME not implmented)
bool enable_logging; // shall we log SPE events?
- uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k)
- spe_program_handle_t *program_handle; // program to load into SPEs
+ uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k)
+ spe_program_handle_sptr program_handle; // program to load into SPEs
gc_jm_options() :
max_jobs(0), max_client_threads(0), nspes(0),
gang_schedule(true), use_affinity(false),
- enable_logging(false), log2_nlog_entries(12),
- program_handle(0)
+ enable_logging(false), log2_nlog_entries(12)
{
}
};
+enum gc_wait_mode {
+ GC_WAIT_ANY,
+ GC_WAIT_ALL,
+};
+
+/*
+ * exception classes
+ */
+class gc_exception : public std::runtime_error
+{
+public:
+ gc_exception(const std::string &msg);
+};
+
+class gc_unknown_proc : public gc_exception
+{
+public:
+ gc_unknown_proc(const std::string &msg);
+};
+
+class gc_bad_alloc : public gc_exception
+{
+public:
+ gc_bad_alloc(const std::string &msg);
+};
+
+class gc_bad_align : public gc_exception
+{
+public:
+ gc_bad_align(const std::string &msg);
+};
+
+class gc_bad_submit : public gc_exception
+{
+public:
+ gc_bad_submit(const std::string &name, gc_job_status_t status);
+};
/*
* \brief Create an instance of the job manager
*/
-gc_job_manager *
+gc_job_manager_sptr
gc_make_job_manager(const gc_jm_options *options = 0);
@@ -92,7 +163,7 @@ public:
/*!
* \brief Return a pointer to a properly aligned job descriptor,
- * or zero if none are available.
+ * or throws gc_bad_alloc if there are none available.
*/
virtual gc_job_desc *alloc_job_desc() = 0;
@@ -139,6 +210,8 @@ public:
* A thread may only wait for jobs which it submitted.
*
* \returns number of jobs completed, or -1 if error.
+ * The caller must examine the status field of each job to confirm
+ * successful completion of the job.
*/
virtual int
wait_jobs(unsigned int njobs,
@@ -154,7 +227,7 @@ public:
/*!
* Return gc_proc_id_t associated with spu procedure \p proc_name if one
- * exists, otherwise return GCP_UNKNOWN_PROC.
+ * exists, otherwise throws gc_unknown_proc.
*/
virtual gc_proc_id_t lookup_proc(const std::string &proc_name) = 0;
@@ -163,6 +236,27 @@ public:
*/
virtual std::vector<std::string> proc_names() = 0;
+ /*!
+ * \brief Set the singleton gc_job_manager instance.
+ * \param mgr is the job manager instance.
+ *
+ * The singleton is weakly held, thus the caller must maintain
+ * a reference to the mgr for the duration. (If we held the
+ * manager strongly, the destructor would never be called, and the
+ * resources (SPEs) would not be returned.) Bottom line: the
+ * caller is responsible for life-time management.
+ */
+ static void set_singleton(gc_job_manager_sptr mgr);
+
+ /*!
+ * \brief Retrieve the singleton gc_job_manager instance.
+ *
+ * Returns the singleton gc_job_manager instance or raises
+ * boost::bad_weak_ptr if the singleton is empty.
+ */
+ static gc_job_manager_sptr singleton();
+
+
virtual void set_debug(int debug);
virtual int debug();
};
diff --git a/gcell/src/lib/runtime/gc_job_manager_impl.cc b/gcell/src/lib/runtime/gc_job_manager_impl.cc
index dd08154d0..59deb4ae5 100644
--- a/gcell/src/lib/runtime/gc_job_manager_impl.cc
+++ b/gcell/src/lib/runtime/gc_job_manager_impl.cc
@@ -65,19 +65,6 @@ public:
}
};
-// custom deleter
-class spe_program_handle_deleter {
-public:
- void operator()(spe_program_handle_t *program) {
- if (program){
- int r = spe_image_close(program);
- if (r != 0){
- perror("spe_image_close");
- }
- }
- }
-};
-
// custom deleter of anything that can be freed with "free"
class free_deleter {
@@ -150,7 +137,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
if (d_options.max_client_threads == 0)
d_options.max_client_threads = DEFAULT_MAX_CLIENT_THREADS;
- if (d_options.program_handle == 0){
+ if (!d_options.program_handle){
fprintf(stderr, "gc_job_manager: options->program_handle must be non-zero\n");
throw std::runtime_error("gc_job_manager: options->program_handle must be non-zero");
}
@@ -236,7 +223,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// get a handle to the spe program
- spe_program_handle_t *spe_image = d_options.program_handle;
+ spe_program_handle_t *spe_image = d_options.program_handle.get();
// fish proc_def table out of SPE ELF file
@@ -431,8 +418,12 @@ gc_job_manager_impl::bv_isclr(unsigned long *bv, unsigned int bitno)
gc_job_desc *
gc_job_manager_impl::alloc_job_desc()
{
- // stack is lock free, thus safe to call from any thread
- return gc_jd_stack_pop(d_free_list);
+ // stack is lock free, and safe to call from any thread
+ gc_job_desc *jd = gc_jd_stack_pop(d_free_list);
+ if (jd == 0)
+ throw gc_bad_alloc("alloc_job_desc: none available");
+
+ return jd;
}
void
@@ -557,7 +548,7 @@ bool
gc_job_manager_impl::wait_job(gc_job_desc *jd)
{
bool done;
- return wait_jobs(1, &jd, &done, GC_WAIT_ANY) == 1;
+ return wait_jobs(1, &jd, &done, GC_WAIT_ANY) == 1 && jd->status == JS_OK;
}
int
@@ -1246,7 +1237,7 @@ gc_job_manager_impl::lookup_proc(const std::string &proc_name)
if (proc_name == d_proc_def[i].name)
return i;
- return GCP_UNKNOWN_PROC;
+ throw gc_unknown_proc(proc_name);
}
std::vector<std::string>
diff --git a/gcell/src/lib/runtime/gc_job_manager_impl.h b/gcell/src/lib/runtime/gc_job_manager_impl.h
index 46897848c..fcc24dc0c 100644
--- a/gcell/src/lib/runtime/gc_job_manager_impl.h
+++ b/gcell/src/lib/runtime/gc_job_manager_impl.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2007 Free Software Foundation, Inc.
+ * Copyright 2007,2008 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -29,7 +29,6 @@
#include "gc_spu_args.h"
#include <libspe2.h>
#include <vector>
-#include <boost/shared_ptr.hpp>
#include <boost/scoped_array.hpp>
typedef boost::shared_ptr<spe_gang_context> spe_gang_context_sptr;
@@ -169,7 +168,7 @@ private:
void sync_logfiles();
void unmap_logfiles();
- friend gc_job_manager *gc_make_job_manager(const gc_jm_options *options);
+ friend gc_job_manager_sptr gc_make_job_manager(const gc_jm_options *options);
gc_job_manager_impl(const gc_jm_options *options = 0);
diff --git a/gcell/src/lib/runtime/gcell-embedspu-libtool b/gcell/src/lib/runtime/gcell-embedspu-libtool
new file mode 100755
index 000000000..a4ee53b7e
--- /dev/null
+++ b/gcell/src/lib/runtime/gcell-embedspu-libtool
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+if [ $# -ne 2 ]; then
+ echo "usage: gcell-embedspu-libtool file.lo spu_executable_file" 1>&2
+ exit 1
+fi
+
+lo_file=$1
+spu_executable=$2
+symbol_name=${lo_file%%.lo}
+
+# generate the .o file that wraps the SPU executable
+ppu-embedspu -m32 -fpic ${symbol_name} ${spu_executable} .libs/${symbol_name}.o
+
+# generate the .lo libtool file that points at all the right places
+rm -f $lo_file
+cat >$lo_file.new <<EOF
+# $lo_file - a libtool object file
+# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+pic_object='.libs/${symbol_name}.o'
+non_pic_object=none
+EOF
+
+mv $lo_file.new $lo_file
+
diff --git a/gcell/src/lib/runtime/qa_lib.cc b/gcell/src/lib/runtime/qa_gcell_runtime.cc
index d8a8960c6..fef9a7fb4 100644
--- a/gcell/src/lib/runtime/qa_lib.cc
+++ b/gcell/src/lib/runtime/qa_gcell_runtime.cc
@@ -25,15 +25,15 @@
* add them here.
*/
-#include <qa_lib.h>
+#include <qa_gcell_runtime.h>
#include <qa_jd_stack.h>
#include <qa_jd_queue.h>
#include <qa_job_manager.h>
CppUnit::TestSuite *
-qa_lib::suite()
+qa_gcell_runtime::suite()
{
- CppUnit::TestSuite *s = new CppUnit::TestSuite("lib");
+ CppUnit::TestSuite *s = new CppUnit::TestSuite("runtime");
s->addTest(qa_jd_stack::suite());
s->addTest(qa_jd_queue::suite());
diff --git a/gcell/src/lib/runtime/qa_lib.h b/gcell/src/lib/runtime/qa_gcell_runtime.h
index 594efcdc8..36180c919 100644
--- a/gcell/src/lib/runtime/qa_lib.h
+++ b/gcell/src/lib/runtime/qa_gcell_runtime.h
@@ -18,18 +18,18 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-#ifndef INCLUDED_QA_LIB_H
-#define INCLUDED_QA_LIB_H
+#ifndef INCLUDED_QA_GCELL_RUNTIME_H
+#define INCLUDED_QA_GCELL_RUNTIME_H
#include <cppunit/TestSuite.h>
-//! collect all the tests for the lib directory
+//! collect all the tests for the runtime directory
-class qa_lib {
+class qa_gcell_runtime {
public:
//! return suite of tests
static CppUnit::TestSuite *suite();
};
-#endif /* INCLUDED_QA_LIB_H */
+#endif /* INCLUDED_QA_GCELL_RUNTIME_H */
diff --git a/gcell/src/lib/runtime/qa_job_manager.cc b/gcell/src/lib/runtime/qa_job_manager.cc
index 3f2780c52..53a1ec681 100644
--- a/gcell/src/lib/runtime/qa_job_manager.cc
+++ b/gcell/src/lib/runtime/qa_job_manager.cc
@@ -29,7 +29,8 @@
#include <malloc.h>
-extern spe_program_handle_t gcell_qa; // handle to embedded SPU executable w/ QA routines
+// handle to embedded SPU executable w/ QA routines
+extern spe_program_handle_t gcell_runtime_qa;
#if 0
static void
@@ -173,23 +174,21 @@ qa_job_manager::t15()
void
qa_job_manager::t1_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
mgr = gc_make_job_manager(&opts);
- delete mgr;
}
void
qa_job_manager::t2_body()
{
- gc_job_manager *mgr = 0;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 100;
opts.gang_schedule = false;
mgr = gc_make_job_manager(&opts);
- delete mgr;
}
void
@@ -200,13 +199,12 @@ qa_job_manager::t3_body()
// cppunit. cppunit is the prime suspect.
#if 0
- gc_job_manager *mgr = 0;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 100;
opts.gang_schedule = true;
CPPUNIT_ASSERT_THROW(mgr = gc_make_job_manager(&opts), std::out_of_range);
- delete mgr;
#endif
}
@@ -222,9 +220,9 @@ init_jd(gc_job_desc *jd, gc_proc_id_t proc_id)
void
qa_job_manager::t4_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
//mgr->set_debug(-1);
@@ -232,8 +230,8 @@ qa_job_manager::t4_body()
gc_job_desc *jds[NJOBS];
bool done[NJOBS];
- gc_proc_id_t gcp_no_such = mgr->lookup_proc("--no-such-proc-name--");
- CPPUNIT_ASSERT_EQUAL(GCP_UNKNOWN_PROC, gcp_no_such);
+ gc_proc_id_t gcp_no_such;
+ CPPUNIT_ASSERT_THROW(gcp_no_such = mgr->lookup_proc("--no-such-proc-name--"), gc_unknown_proc);
gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop");
CPPUNIT_ASSERT(gcp_qa_nop != GCP_UNKNOWN_PROC);
@@ -256,16 +254,14 @@ qa_job_manager::t4_body()
for (int i = 0; i < NJOBS; i++){
mgr->free_job_desc(jds[i]);
}
-
- delete mgr;
}
void
qa_job_manager::t5_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 0; // use them all
mgr = gc_make_job_manager(&opts);
//mgr->set_debug(-1);
@@ -293,16 +289,14 @@ qa_job_manager::t5_body()
for (int i = 0; i < NJOBS; i++){
mgr->free_job_desc(jds[i]);
}
-
- delete mgr;
}
void
qa_job_manager::t6_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop");
@@ -330,7 +324,6 @@ qa_job_manager::t6_body()
}
mgr->free_job_desc(jd);
- delete mgr;
}
static int
@@ -344,7 +337,7 @@ sum_shorts(short *p, int nshorts)
}
static void
-test_sum_shorts(gc_job_manager *mgr, short *buf, int nshorts)
+test_sum_shorts(gc_job_manager_sptr mgr, short *buf, int nshorts)
{
gc_job_desc *jd = mgr->alloc_job_desc();
gc_proc_id_t gcp_qa_sum_shorts = mgr->lookup_proc("qa_sum_shorts");
@@ -379,9 +372,9 @@ static short short_buf[NS] _AL128; // for known alignment
void
qa_job_manager::t7_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
@@ -400,8 +393,6 @@ qa_job_manager::t7_body()
for (int offset = 0; offset <= 64; offset++){
test_sum_shorts(mgr, &short_buf[offset], ea_args_maxsize/sizeof(short));
}
-
- delete mgr;
}
//
@@ -410,9 +401,9 @@ qa_job_manager::t7_body()
void
qa_job_manager::t8_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
gc_job_desc *jd = mgr->alloc_job_desc();
@@ -433,7 +424,6 @@ qa_job_manager::t8_body()
}
mgr->free_job_desc(jd);
- delete mgr;
}
//
@@ -444,9 +434,9 @@ qa_job_manager::t9_body()
{
static const int N = 127;
static const int M = 201;
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
gc_job_desc *jd = mgr->alloc_job_desc();
@@ -474,7 +464,6 @@ qa_job_manager::t9_body()
}
mgr->free_job_desc(jd);
- delete mgr;
}
static bool
@@ -510,7 +499,7 @@ confirm_seq(const unsigned char *buf, size_t len, unsigned char v)
}
static void
-test_put_seq(gc_job_manager *mgr, int offset, int len, int starting_val)
+test_put_seq(gc_job_manager_sptr mgr, int offset, int len, int starting_val)
{
gc_job_desc *jd = mgr->alloc_job_desc();
gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq");
@@ -556,9 +545,9 @@ test_put_seq(gc_job_manager *mgr, int offset, int len, int starting_val)
void
qa_job_manager::t10_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
@@ -576,8 +565,6 @@ qa_job_manager::t10_body()
for (int offset = 0; offset <= 64; offset++){
test_put_seq(mgr, offset, ea_args_maxsize, starting_val++);
}
-
- delete mgr;
}
//
@@ -586,9 +573,9 @@ qa_job_manager::t10_body()
void
qa_job_manager::t11_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
gc_job_desc *jd = mgr->alloc_job_desc();
@@ -611,7 +598,6 @@ qa_job_manager::t11_body()
}
mgr->free_job_desc(jd);
- delete mgr;
}
//
@@ -622,9 +608,9 @@ qa_job_manager::t12_body()
{
static const int N = 127;
static const int M = 201;
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
gc_job_desc *jd = mgr->alloc_job_desc();
@@ -662,7 +648,6 @@ qa_job_manager::t12_body()
}
mgr->free_job_desc(jd);
- delete mgr;
}
//
@@ -671,9 +656,9 @@ qa_job_manager::t12_body()
void
qa_job_manager::t13_body()
{
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
@@ -720,8 +705,6 @@ qa_job_manager::t13_body()
CPPUNIT_ASSERT(ok);
}
mgr->free_job_desc(jd);
-
- delete mgr;
}
/*
@@ -743,9 +726,9 @@ qa_job_manager::t14_body()
memset(buf, 0xff, LEN_PER_JOB * NJOBS);
- gc_job_manager *mgr;
+ gc_job_manager_sptr mgr;
gc_jm_options opts;
- opts.program_handle = &gcell_qa;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
opts.nspes = 1;
mgr = gc_make_job_manager(&opts);
@@ -788,11 +771,19 @@ qa_job_manager::t14_body()
// cleanup
for (int i = 0; i < NJOBS; i++)
mgr->free_job_desc(jd[i]);
-
- delete mgr;
}
void
qa_job_manager::t15_body()
{
+ gc_jm_options opts;
+ opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa);
+ opts.nspes = 1;
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+ gc_job_manager::set_singleton(mgr);
+
+ CPPUNIT_ASSERT(gc_job_manager::singleton());
+ mgr.reset();
+ CPPUNIT_ASSERT_THROW(gc_job_manager::singleton(), boost::bad_weak_ptr);
}
diff --git a/gcell/src/lib/runtime/spu/gc_spu_config.h b/gcell/src/lib/runtime/spu/gc_spu_config.h
index 997645e68..6eff71060 100644
--- a/gcell/src/lib/runtime/spu/gc_spu_config.h
+++ b/gcell/src/lib/runtime/spu/gc_spu_config.h
@@ -24,7 +24,7 @@
#include <gc_job_desc.h>
#define CACHE_LINE_SIZE 128 // in bytes
-#define GC_SPU_BUFSIZE_BASE (32 * 1024) // must be multiple of CACHE_LINE_SIZE
+#define GC_SPU_BUFSIZE_BASE (40 * 1024) // must be multiple of CACHE_LINE_SIZE
#define GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * CACHE_LINE_SIZE)
#define NGETBUFS 1 // single buffer job arg gets
diff --git a/gcell/src/lib/runtime/spu/gcell_qa.c b/gcell/src/lib/runtime/spu/gcell_runtime_qa.c
index 51bf38a6a..51bf38a6a 100644
--- a/gcell/src/lib/runtime/spu/gcell_qa.c
+++ b/gcell/src/lib/runtime/spu/gcell_runtime_qa.c
diff --git a/gcell/src/lib/spu/Makefile.am b/gcell/src/lib/spu/Makefile.am
index fac057cd5..3c96d8f30 100644
--- a/gcell/src/lib/spu/Makefile.am
+++ b/gcell/src/lib/spu/Makefile.am
@@ -57,23 +57,28 @@ runtime_spu_noinst_headers = \
general_srcdir = $(srcdir)/../general/spu
-general_spu_sources =
+general_spu_sources = \
+ $(general_srcdir)/fft_1d_r2.c
-general_spu_headers =
+general_spu_headers = \
+ $(general_srcdir)/libfft.h
-general_spu_noinst_headers =
+general_spu_noinst_headers = \
+ $(general_srcdir)/fft_1d.h \
+ $(general_srcdir)/fft_1d_r2.h
# ----------------------------------------------------------------
-# files in the lib/procs/spu directory
+# files in the lib/wrapper/spu directory
-procs_srcdir = $(srcdir)/../proc/spu
+wrapper_srcdir = $(srcdir)/../wrapper/spu
-procs_spu_sources =
+wrapper_spu_sources = \
+ $(wrapper_srcdir)/gcs_fft_1d_r2.c
-procs_spu_headers =
+wrapper_spu_headers =
-procs_spu_noinst_headers =
+wrapper_spu_noinst_headers =
# ----------------------------------------------------------------
# build the library from the files in the three directories
@@ -81,23 +86,29 @@ procs_spu_noinst_headers =
libgcell_spu_a_SOURCES = \
$(runtime_spu_sources) \
$(general_spu_sources) \
- $(procs_spu_sources)
+ $(wrapper_spu_sources)
gcellspuinclude_HEADERS = \
$(runtime_spu_headers) \
$(general_spu_headers) \
- $(procs_spu_headers)
+ $(wrapper_spu_headers)
noinst_HEADERS = \
$(runtime_spu_noinst_headers) \
$(general_spu_noinst_headers) \
- $(procs_spu_noinst_headers)
+ $(wrapper_spu_noinst_headers)
# ----------------------------------------------------------------
-# SPU executable containing QA code
+# SPU executables
noinst_PROGRAMS = \
- gcell_qa
+ gcell_all \
+ gcell_runtime_qa
-gcell_qa_SOURCES = $(runtime_srcdir)/gcell_qa.c
-gcell_qa_LDADD = libgcell_spu.a
+# all known gcell procs (at least until they get too big)
+gcell_all_SOURCES = $(wrapper_spu_sources)
+gcell_all_LDADD = libgcell_spu.a
+
+# just the QA code required for testing the runtime
+gcell_runtime_qa_SOURCES = $(runtime_srcdir)/gcell_runtime_qa.c
+gcell_runtime_qa_LDADD = libgcell_spu.a
diff --git a/gcell/src/lib/procs/Makefile.am b/gcell/src/lib/wrapper/Makefile.am
index 0e32ffc37..03ffa54b3 100644
--- a/gcell/src/lib/procs/Makefile.am
+++ b/gcell/src/lib/wrapper/Makefile.am
@@ -20,5 +20,31 @@
include $(top_srcdir)/Makefile.common
-# SUBDIRS = spu .
+AM_CPPFLAGS = $(DEFINES) $(GCELL_INCLUDES) $(FFTW3F_CFLAGS) $(WITH_INCLUDES)
+noinst_LTLIBRARIES = libwrapper.la libwrapper-qa.la
+
+# generate a libtool.lo that contains an embeded SPU executable
+gcell_all.lo: ../spu/gcell_all
+ $(GCELL_EMBEDSPU_LIBTOOL) $@ $<
+
+libwrapper_la_SOURCES = \
+ gcp_fft_1d_r2.cc
+
+libwrapper_la_LIBADD = \
+ gcell_all.lo
+
+libwrapper_qa_la_SOURCES = \
+ qa_gcell_wrapper.cc \
+ qa_gcp_fft_1d_r2.cc
+
+libwrapper_qa_la_LIBADD = \
+ -lfftw3f
+
+gcellinclude_HEADERS = \
+ gcp_fft_1d_r2.h
+
+noinst_HEADERS = \
+ qa_gcell_wrapper.h
+
+CLEANFILES = gcell_all.lo
diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc
new file mode 100644
index 000000000..d639dad45
--- /dev/null
+++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc
@@ -0,0 +1,116 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <gcp_fft_1d_r2.h>
+#include <stdint.h>
+#include <stdexcept>
+#include <math.h>
+
+static void
+init_jd(gc_job_desc *jd,
+ gc_proc_id_t proc_id,
+ unsigned log2_fft_length,
+ bool forward,
+ std::complex<float> *out,
+ const std::complex<float> *in,
+ const std::complex<float> *W)
+{
+ jd->proc_id = proc_id;
+ jd->input.nargs = 2;
+ jd->output.nargs = 0;
+ jd->eaa.nargs = 3;
+
+ jd->input.arg[0].u32 = log2_fft_length;
+ jd->input.arg[1].u32 = forward;
+ unsigned int fft_length = 1 << log2_fft_length;
+
+ jd->eaa.arg[0].ea_addr = ptr_to_ea(out);
+ jd->eaa.arg[0].direction = GCJD_DMA_PUT;
+ jd->eaa.arg[0].put_size = sizeof(std::complex<float>) * fft_length;
+
+ jd->eaa.arg[1].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(in));
+ jd->eaa.arg[1].direction = GCJD_DMA_GET;
+ jd->eaa.arg[1].get_size = sizeof(std::complex<float>) * fft_length;
+
+ jd->eaa.arg[2].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(W));
+ jd->eaa.arg[2].direction = GCJD_DMA_GET;
+ jd->eaa.arg[2].get_size = sizeof(std::complex<float>) * fft_length / 4;
+}
+
+
+gc_job_desc *
+gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
+ unsigned int log2_fft_length,
+ bool forward,
+ std::complex<float> *out,
+ const std::complex<float> *in,
+ const std::complex<float> *W)
+{
+ unsigned int fft_length = 1 << log2_fft_length;
+ if (fft_length > 4096)
+ throw std::invalid_argument("fft_length > 4096");
+
+ if ((intptr_t)out & 0xf)
+ throw gc_bad_align("out");
+ if ((intptr_t)in & 0xf)
+ throw gc_bad_align("in");
+ if ((intptr_t)W & 0xf)
+ throw gc_bad_align("W");
+
+ gc_proc_id_t fft_id = mgr->lookup_proc("fft_1d_r2");
+ gc_job_desc *jd = mgr->alloc_job_desc();
+ init_jd(jd, fft_id, log2_fft_length, forward, out, in, W);
+ if (!mgr->submit_job(jd)){
+ gc_job_status_t s = jd->status;
+ mgr->free_job_desc(jd);
+ throw gc_bad_submit("fft_1d_r2", s);
+ }
+ return jd;
+}
+
+void
+gcp_fft_1d_r2_forward_twiddle(unsigned int log2_fft_length, std::complex<float> *W)
+{
+ unsigned int n = 1 << log2_fft_length;
+
+ W[0].real() = 1.0;
+ W[0].imag() = 0.0;
+ for (unsigned i=1; i < n/4; i++){
+ W[i].real() = cos(i * 2*M_PI/n);
+ W[n/4 - i].imag() = -W[i].real();
+ }
+}
+
+
+void
+gcp_fft_1d_r2_reverse_twiddle(unsigned int log2_fft_length, std::complex<float> *W)
+{
+ // FIXME this is wrong/insufficient. inverse is still incorrect
+
+ // reverse factors are the conjugate of the forward factors
+ gcp_fft_1d_r2_forward_twiddle(log2_fft_length, W);
+
+ unsigned int n = 1 << log2_fft_length;
+ for (unsigned i=0; i < n/4; i++)
+ W[i] = conj(W[i]);
+}
diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.h b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h
new file mode 100644
index 000000000..be1440fd4
--- /dev/null
+++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h
@@ -0,0 +1,66 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCP_FFT_1D_R2_H
+#define INCLUDED_GCP_FFT_1D_R2_H
+
+#include <gc_job_manager.h>
+#include <complex>
+
+/*!
+ * \brief Submit a job that computes the forward or reverse FFT.
+ *
+ * \param mgr is the job manager instance
+ * \param log2_fft_length is the log2 of the fft_length (4 <= x <= 13).
+ * \param forward is true to compute the forward xform
+ * \param out is the fft_length output from FFT (must be 16-byte aligned).
+ * \param in is the fft_length input to FFT (must be 16-byte aligned).
+ * \param W is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned).
+ *
+ * Returns a job descriptor which should be passed to wait_job*.
+ * Throws an exception in the event of a problem.
+ */
+gc_job_desc *
+gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
+ unsigned int log2_fft_length,
+ bool forward,
+ std::complex<float> *out,
+ const std::complex<float> *in,
+ const std::complex<float> *W);
+
+/*!
+ * \brief Compute twiddle factors for forward transform.
+ *
+ * \param log2_fft_length is the log2 of the fft_length.
+ * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned).
+ */
+void
+gcp_fft_1d_r2_forward_twiddle(unsigned int log2_fft_length, std::complex<float> *W);
+
+/*!
+ * \brief Compute twiddle factors for reverse transform.
+ *
+ * \param log2_fft_length is the log2 of the fft_length.
+ * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned).
+ */
+void
+gcp_fft_1d_r2_reverse_twiddle(unsigned int log2_fft_length, std::complex<float> *W);
+
+#endif /* INCLUDED_GCP_FFT_1D_R2_H */
diff --git a/gcell/src/lib/wrapper/qa_gcell_wrapper.cc b/gcell/src/lib/wrapper/qa_gcell_wrapper.cc
new file mode 100644
index 000000000..029dfbc58
--- /dev/null
+++ b/gcell/src/lib/wrapper/qa_gcell_wrapper.cc
@@ -0,0 +1,39 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * This class gathers together all the test cases for the lib
+ * directory into a single test suite. As you create new test cases,
+ * add them here.
+ */
+
+#include <qa_gcell_wrapper.h>
+#include <qa_gcp_fft_1d_r2.h>
+
+CppUnit::TestSuite *
+qa_gcell_wrapper::suite()
+{
+ CppUnit::TestSuite *s = new CppUnit::TestSuite("wrapper");
+
+ s->addTest(qa_gcp_fft_1d_r2::suite());
+
+ return s;
+}
diff --git a/gcell/src/lib/wrapper/qa_gcell_wrapper.h b/gcell/src/lib/wrapper/qa_gcell_wrapper.h
new file mode 100644
index 000000000..cb29db883
--- /dev/null
+++ b/gcell/src/lib/wrapper/qa_gcell_wrapper.h
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_GCELL_WRAPPER_H
+#define INCLUDED_QA_GCELL_WRAPPER_H
+
+#include <cppunit/TestSuite.h>
+
+//! collect all the tests for the wrapper directory
+
+class qa_gcell_wrapper {
+public:
+ //! return suite of tests
+ static CppUnit::TestSuite *suite();
+};
+
+
+#endif /* INCLUDED_QA_GCELL_WRAPPER_H */
diff --git a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc
new file mode 100644
index 000000000..1bb676ac2
--- /dev/null
+++ b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc
@@ -0,0 +1,211 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "qa_gcp_fft_1d_r2.h"
+#include <cppunit/TestAssert.h>
+#include <gcp_fft_1d_r2.h>
+#include <fftw3.h>
+#include <stdio.h>
+#include <stdlib.h> // random, posix_memalign
+#include <algorithm>
+
+typedef boost::shared_ptr<void> void_sptr;
+
+// handle to embedded SPU executable
+extern spe_program_handle_t gcell_all;
+
+/*
+ * Return pointer to cache-aligned chunk of storage of size size bytes.
+ * Throw if can't allocate memory. The storage should be freed
+ * with "free" when done. The memory is initialized to zero.
+ */
+static void *
+aligned_alloc(size_t size, size_t alignment = 128)
+{
+ void *p = 0;
+ if (posix_memalign(&p, alignment, size) != 0){
+ perror("posix_memalign");
+ throw std::runtime_error("memory");
+ }
+ memset(p, 0, size); // zero the memory
+ return p;
+}
+
+class free_deleter {
+public:
+ void operator()(void *p) {
+ free(p);
+ }
+};
+
+static boost::shared_ptr<void>
+aligned_alloc_sptr(size_t size, size_t alignment = 128)
+{
+ return boost::shared_ptr<void>(aligned_alloc(size, alignment), free_deleter());
+}
+
+// test forward FFT
+void
+qa_gcp_fft_1d_r2::t1()
+{
+ gc_jm_options opts;
+ opts.program_handle = gc_program_handle_from_address(&gcell_all);
+ opts.nspes = 1;
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+#if 1
+ for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){
+ test(mgr, log2_fft_size, true);
+ }
+#else
+ test(mgr, 5, true);
+#endif
+}
+
+// test reverse FFT
+void
+qa_gcp_fft_1d_r2::t2()
+{
+ gc_jm_options opts;
+ opts.program_handle = gc_program_handle_from_address(&gcell_all);
+ opts.nspes = 1;
+ gc_job_manager_sptr mgr = gc_make_job_manager(&opts);
+
+#if 1
+ for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){
+ test(mgr, log2_fft_size, false);
+ }
+#else
+ test(mgr, 5, false);
+#endif
+}
+
+void
+qa_gcp_fft_1d_r2::t3()
+{
+}
+
+void
+qa_gcp_fft_1d_r2::t4()
+{
+}
+
+static inline float
+abs_diff(std::complex<float> x, std::complex<float> y)
+{
+ return std::max(std::abs(x.real()-y.real()),
+ std::abs(x.imag()-y.imag()));
+}
+
+static float
+float_abs_rel_error(float ref, float actual)
+{
+ float delta = ref - actual;
+ if (std::abs(ref) < 1e-18)
+ ref = 1e-18;
+ return std::abs(delta/ref);
+}
+
+static float
+abs_rel_error(std::complex<float> ref, std::complex<float> actual)
+{
+ return std::max(float_abs_rel_error(ref.real(), actual.real()),
+ float_abs_rel_error(ref.imag(), actual.imag()));
+}
+
+void
+qa_gcp_fft_1d_r2::test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward)
+{
+ int fft_size = 1 << log2_fft_size;
+
+ // allocate aligned buffers with boost shared_ptr's
+ void_sptr fftw_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+ void_sptr fftw_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+ void_sptr cell_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+ void_sptr cell_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128);
+ void_sptr cell_twiddle_void = aligned_alloc_sptr(fft_size/4 * sizeof(std::complex<float>), 128);
+
+ // cast them to the type we really want
+ std::complex<float> *fftw_in = (std::complex<float> *) fftw_in_void.get();
+ std::complex<float> *fftw_out = (std::complex<float> *) fftw_out_void.get();
+ std::complex<float> *cell_in = (std::complex<float> *) cell_in_void.get();
+ std::complex<float> *cell_out = (std::complex<float> *) cell_out_void.get();
+ std::complex<float> *cell_twiddle = (std::complex<float> *) cell_twiddle_void.get();
+
+ if (forward)
+ gcp_fft_1d_r2_forward_twiddle(log2_fft_size, cell_twiddle);
+ else
+ gcp_fft_1d_r2_reverse_twiddle(log2_fft_size, cell_twiddle);
+
+ srandom(1); // we want reproducibility
+
+ // initialize the input buffers
+ for (int i = 0; i < fft_size; i++){
+ std::complex<float> t((float) (random() & 0xfffff), (float) (random() & 0xfffff));
+ fftw_in[i] = t;
+ cell_in[i] = t;
+ }
+
+ // ------------------------------------------------------------------------
+ // compute the reference answer
+ fftwf_plan plan = fftwf_plan_dft_1d (fft_size,
+ reinterpret_cast<fftwf_complex *>(fftw_in),
+ reinterpret_cast<fftwf_complex *>(fftw_out),
+ forward ? FFTW_FORWARD : FFTW_BACKWARD,
+ FFTW_ESTIMATE);
+ if (plan == 0){
+ fprintf(stderr, "qa_gcp_fft_1d_r2: error creating FFTW plan\n");
+ throw std::runtime_error ("fftwf_plan_dft_r2c_1d failed");
+ }
+
+ fftwf_execute(plan);
+ fftwf_destroy_plan(plan);
+
+ // ------------------------------------------------------------------------
+ // compute the answer on the cell
+ gc_job_desc *jd = gcp_fft_1d_r2_submit(mgr, log2_fft_size, forward,
+ cell_out, cell_in, cell_twiddle);
+ if (!mgr->wait_job(jd)){
+ fprintf(stderr, "wait_job failed: %s\n", gc_job_status_string(jd->status).c_str());
+ mgr->free_job_desc(jd);
+ CPPUNIT_ASSERT(0);
+ }
+ mgr->free_job_desc(jd);
+
+ // ------------------------------------------------------------------------
+ // compute the maximum of the relative error
+ float max_rel = 0.0;
+ for (int i = 0; i < fft_size; i++){
+ max_rel = std::max(max_rel, abs_rel_error(fftw_out[i], cell_out[i]));
+ if (0)
+ printf("(%16.3f, %16.3fj) (%16.3f, %16.3fj) (%16.3f, %16.3fj)\n",
+ fftw_out[i].real(), fftw_out[i].imag(),
+ cell_out[i].real(), cell_out[i].imag(),
+ fftw_out[i].real() - cell_out[i].real(),
+ fftw_out[i].imag() - cell_out[i].imag());
+ }
+
+ fprintf(stdout, "%s fft_size = %4d max_rel_error = %e\n",
+ forward ? "fwd" : "rev", fft_size, max_rel);
+
+ // CPPUNIT_ASSERT(max_rel <= 1e-4);
+
+}
diff --git a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h
new file mode 100644
index 000000000..38beafb21
--- /dev/null
+++ b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_QA_GCP_FFT_1D_R2_H
+#define INCLUDED_QA_GCP_FFT_1D_R2_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+#include <gc_job_manager.h>
+
+class qa_gcp_fft_1d_r2 : public CppUnit::TestCase {
+
+ CPPUNIT_TEST_SUITE(qa_gcp_fft_1d_r2);
+ CPPUNIT_TEST(t1);
+ CPPUNIT_TEST(t2);
+ CPPUNIT_TEST(t3);
+ CPPUNIT_TEST(t4);
+ CPPUNIT_TEST_SUITE_END();
+
+ private:
+ void t1();
+ void t2();
+ void t3();
+ void t4();
+
+ void test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward);
+};
+
+
+
+#endif /* INCLUDED_QA_GCP_FFT_1D_R2_H */
diff --git a/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c
new file mode 100644
index 000000000..36bd878ed
--- /dev/null
+++ b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c
@@ -0,0 +1,39 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gc_declare_proc.h>
+#include <libfft.h>
+
+static void
+gcs_fft_1d_r2(const gc_job_direct_args_t *input,
+ gc_job_direct_args_t *output __attribute__((unused)),
+ const gc_job_ea_args_t *eaa)
+{
+ vector float *out = (vector float *) eaa->arg[0].ls_addr;
+ vector float *in = (vector float *) eaa->arg[1].ls_addr;
+ vector float *W = (vector float *) eaa->arg[2].ls_addr;
+ int log2_fft_length = input->arg[0].u32;
+ int forward = input->arg[1].u32; // non-zero if forward xform (FIXME use)
+
+ fft_1d_r2(out, in, W, log2_fft_length);
+}
+
+GC_DECLARE_PROC(gcs_fft_1d_r2, "fft_1d_r2");