diff options
author | eb | 2008-04-16 03:48:33 +0000 |
---|---|---|
committer | eb | 2008-04-16 03:48:33 +0000 |
commit | 8f2a5f3837da61a4d49251ee65f91f0d2e8e48de (patch) | |
tree | 750df8032c31b412a413ce749ec30d9363ac56f5 /gcell/src | |
parent | 6d234892030754c0cd058ad85d2c3759b0538c90 (diff) | |
download | gnuradio-8f2a5f3837da61a4d49251ee65f91f0d2e8e48de.tar.gz gnuradio-8f2a5f3837da61a4d49251ee65f91f0d2e8e48de.tar.bz2 gnuradio-8f2a5f3837da61a4d49251ee65f91f0d2e8e48de.zip |
Merged gcell-wip -r8159:8202 into trunk. This includes the following
changes:
* gc_make_job_manager now returns a boost::shared_ptr
* opts.program_handle is now a boost::shared_ptr
* two new functions for getting a program handle
* look_proc and alloc_job_desc now throw on error
* static methods for setting and getting a single job manager
* new exception hierarchy
* mv gcell/src/lib/procs gcell/src/lib/wrapper
* added libfft. Currently inverse xform is broken
* gcell-embedspu-libtool creates libtool complaint .ko's from SPE executables
git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8209 221aa14e-8319-0410-a670-987f0aec2ac5
Diffstat (limited to 'gcell/src')
29 files changed, 1717 insertions, 146 deletions
diff --git a/gcell/src/apps/benchmark_dma.cc b/gcell/src/apps/benchmark_dma.cc index b0af8b74b..961876ad8 100644 --- a/gcell/src/apps/benchmark_dma.cc +++ b/gcell/src/apps/benchmark_dma.cc @@ -121,11 +121,11 @@ run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size, int getp } gc_jm_options opts; - opts.program_handle = &benchmark_procs; + opts.program_handle = gc_program_handle_from_address(&benchmark_procs); opts.nspes = nspes; //opts.enable_logging = true; //opts.log2_nlog_entries = 13; - gc_job_manager *mgr = gc_make_job_manager(&opts); + gc_job_manager_sptr mgr = gc_make_job_manager(&opts); if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == GCP_UNKNOWN_PROC){ fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n"); @@ -211,8 +211,6 @@ run_test(unsigned int nspes, unsigned int usecs, unsigned int dma_size, int getp (double) njobs * dma_size / delta * (getput_mask == BENCHMARK_GET_PUT ? 2.0 : 1.0)); } - - delete mgr; } static void diff --git a/gcell/src/apps/benchmark_nop.cc b/gcell/src/apps/benchmark_nop.cc index 2d3611fd9..b87137d50 100644 --- a/gcell/src/apps/benchmark_nop.cc +++ b/gcell/src/apps/benchmark_nop.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2007 Free Software Foundation, Inc. + * Copyright 2007,2008 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -57,10 +57,10 @@ run_test(unsigned int nspes, unsigned int usecs, int njobs) bool done[NJDS]; gc_jm_options opts; - opts.program_handle = &benchmark_procs; + opts.program_handle = gc_program_handle_from_address(&benchmark_procs); opts.nspes = nspes; opts.gang_schedule = true; - gc_job_manager *mgr = gc_make_job_manager(&opts); + gc_job_manager_sptr mgr = gc_make_job_manager(&opts); if ((gcp_benchmark_udelay = mgr->lookup_proc("benchmark_udelay")) == GCP_UNKNOWN_PROC){ fprintf(stderr, "lookup_proc: failed to find \"benchmark_udelay\"\n"); @@ -127,8 +127,6 @@ run_test(unsigned int nspes, unsigned int usecs, int njobs) printf("nspes: %2d udelay: %4d elapsed_time: %7.3f njobs: %g speedup: %6.3f\n", mgr->nspes(), usecs, delta, (double) njobs, njobs * usecs * 1e-6 / delta); - - delete mgr; } int diff --git a/gcell/src/apps/test_all.cc b/gcell/src/apps/test_all.cc index e652de21d..798549be1 100644 --- a/gcell/src/apps/test_all.cc +++ b/gcell/src/apps/test_all.cc @@ -21,8 +21,8 @@ #include <cppunit/TextTestRunner.h> -#include <qa_lib.h> - +#include <qa_gcell_runtime.h> +#include <qa_gcell_wrapper.h> int main(int argc, char **argv) @@ -30,7 +30,8 @@ main(int argc, char **argv) CppUnit::TextTestRunner runner; - runner.addTest(qa_lib::suite()); + runner.addTest(qa_gcell_runtime::suite()); + runner.addTest(qa_gcell_wrapper::suite()); bool was_successful = runner.run("", false); diff --git a/gcell/src/lib/Makefile.am b/gcell/src/lib/Makefile.am index 2ccedc332..e7b349331 100644 --- a/gcell/src/lib/Makefile.am +++ b/gcell/src/lib/Makefile.am @@ -20,7 +20,7 @@ include $(top_srcdir)/Makefile.common -SUBDIRS = spu runtime general procs . +SUBDIRS = spu runtime general wrapper . # generate libgcell.la from the convenience libraries in subdirs @@ -34,11 +34,13 @@ libgcell_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libgcell_la_LIBADD = \ runtime/libruntime.la \ + wrapper/libwrapper.la \ -lspe2 \ $(OMNITHREAD_LA) libgcell_qa_la_LIBADD = \ runtime/libruntime-qa.la \ + wrapper/libwrapper-qa.la \ $(CPPUNIT_LIBS) diff --git a/gcell/src/lib/general/Makefile.am b/gcell/src/lib/general/Makefile.am index 0e32ffc37..bd5a4de62 100644 --- a/gcell/src/lib/general/Makefile.am +++ b/gcell/src/lib/general/Makefile.am @@ -20,5 +20,4 @@ include $(top_srcdir)/Makefile.common -# SUBDIRS = spu . diff --git a/gcell/src/lib/general/spu/fft_1d.h b/gcell/src/lib/general/spu/fft_1d.h new file mode 100644 index 000000000..355b84bf1 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d.h @@ -0,0 +1,103 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_H_ +#define _FFT_1D_H_ 1 + +#include <spu_intrinsics.h> + +/* BIT_SWAP - swaps up to 16 bits of the integer _i according to the + * pattern specified by _pat. + */ +#define BIT_SWAP(_i, _pat) spu_extract(spu_gather(spu_shuffle(spu_maskb(_i), _pat, _pat)), 0) + + +#ifndef MAX_FFT_1D_SIZE +#define MAX_FFT_1D_SIZE 8192 +#endif + +#ifndef INV_SQRT_2 +#define INV_SQRT_2 0.7071067811865 +#endif + + +/* The following macro, FFT_1D_BUTTERFLY, performs a 4 way SIMD basic butterfly + * operation. The inputs are in parallel arrays (seperate real and imaginary + * vectors). + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + */ + +#define FFT_1D_BUTTERFLY(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_msub(_q_re, _W_re, spu_mul(_q_im, _W_im)); \ + _qw_im = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + + +/* FFT_1D_BUTTERFLY_HI is equivalent to FFT_1D_BUTTERFLY with twiddle factors (W_im, -W_re) + */ +#define FFT_1D_BUTTERFLY_HI(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _qw_im = spu_msub(_q_im, _W_im, spu_mul(_q_re, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + +#endif /* _FFT_1D_H_ */ diff --git a/gcell/src/lib/general/spu/fft_1d_r2.c b/gcell/src/lib/general/spu/fft_1d_r2.c new file mode 100644 index 000000000..a0660b307 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d_r2.c @@ -0,0 +1,35 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <libfft.h> +#include <fft_1d_r2.h> +#include <assert.h> + +/* + * invoke the inline version + */ +void +fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) +{ + assert((1 << log2_size) <= MAX_FFT_1D_SIZE); + + _fft_1d_r2(out, in, W, log2_size); +} diff --git a/gcell/src/lib/general/spu/fft_1d_r2.h b/gcell/src/lib/general/spu/fft_1d_r2.h new file mode 100644 index 000000000..a51cbc341 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d_r2.h @@ -0,0 +1,529 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_R2_H_ +#define _FFT_1D_R2_H_ 1 + +#include "fft_1d.h" + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input <in> is an array of complex numbers of length (1<<log2_size) + * entries. The result is returned in the array of complex numbers specified + * by <out>. Note: This routine can support an in-place transformation + * by specifying <in> and <out> to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of <log2_size> stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[i].imag = -sin(i * 2*PI/n); + * } + * + * This array actually only contains the first half of the twiddle + * factors. Due for symmetry, the second half of the twiddle factors + * are implied and equal: + * + * for (i=0; i<n/4; i++) + * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); + * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); + * } + * + * Further symmetry allows one to generate the twiddle factor table + * using half the number of trig computations as follows: + * + * W[0].real = 1.0; + * W[0].imag = 0.0; + * for (i=1; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[n/4 - i].imag = -W[i].real; + * } + * + * The complex numbers are packed into quadwords as follows: + * + * quadword complex + * array element array elements + * ----------------------------------------------------- + * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | + * ----------------------------------------------------- + * + */ + + +static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) +{ + int i, j, k; + int stage, offset; + int i_rev; + int n, n_2, n_4, n_8, n_16, n_3_16; + int w_stride, w_2stride, w_3stride, w_4stride; + int stride, stride_2, stride_4, stride_3_4; + vector float *W0, *W1, *W2, *W3; + vector float *re0, *re1, *re2, *re3; + vector float *im0, *im1, *im2, *im3; + vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7; + vector float *out0, *out1, *out2, *out3; + vector float tmp0, tmp1; + vector float w0_re, w0_im, w1_re, w1_im; + vector float w0, w1, w2, w3; + vector float src_lo0, src_lo1, src_lo2, src_lo3; + vector float src_hi0, src_hi1, src_hi2, src_hi3; + vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3; + vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3; + vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3; + vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3; + vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3; + vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3; + vector float re_lo0, re_lo1, re_lo2, re_lo3; + vector float im_lo0, im_lo1, im_lo2, im_lo3; + vector float re_hi0, re_hi1, re_hi2, re_hi3; + vector float im_hi0, im_hi1, im_hi2, im_hi3; + vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3; + vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3; + vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */ + vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f}; + vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f}; + vector unsigned char reverse; + vector unsigned char shuf_lo = (vector unsigned char) { + 0, 1, 2, 3, 4, 5, 6, 7, + 16,17,18,19, 20,21,22,23}; + vector unsigned char shuf_hi = (vector unsigned char) { + 8, 9,10,11, 12,13,14,15, + 24,25,26,27, 28,29,30,31}; + vector unsigned char shuf_0202 = (vector unsigned char) { + 0, 1, 2, 3, 8, 9,10,11, + 0, 1, 2, 3, 8, 9,10,11}; + vector unsigned char shuf_1313 = (vector unsigned char) { + 4, 5, 6, 7, 12,13,14,15, + 4, 5, 6, 7, 12,13,14,15}; + vector unsigned char shuf_0303 = (vector unsigned char) { + 0, 1, 2, 3, 12,13,14,15, + 0, 1, 2, 3, 12,13,14,15}; + vector unsigned char shuf_1212 = (vector unsigned char) { + 4, 5, 6, 7, 8, 9,10,11, + 4, 5, 6, 7, 8, 9,10,11}; + vector unsigned char shuf_0415 = (vector unsigned char) { + 0, 1, 2, 3, 16,17,18,19, + 4, 5, 6, 7, 20,21,22,23}; + vector unsigned char shuf_2637 = (vector unsigned char) { + 8, 9,10,11, 24,25,26,27, + 12,13,14,15,28,29,30,31}; + vector unsigned char shuf_0246 = (vector unsigned char) { + 0, 1, 2, 3, 8, 9,10,11, + 16,17,18,19,24,25,26,27}; + vector unsigned char shuf_1357 = (vector unsigned char) { + 4, 5, 6, 7, 12,13,14,15, + 20,21,22,23,28,29,30,31}; + + n = 1 << log2_size; + n_2 = n >> 1; + n_4 = n >> 2; + n_8 = n >> 3; + n_16 = n >> 4; + + n_3_16 = n_8 + n_16; + + /* Compute a byte reverse shuffle pattern to be used to produce + * an address bit swap. + */ + reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size), + spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8, + 7, 6, 5, 4, 3, 2,1,0}), + log2_size-16)); + + /* Perform the first 3 stages of the FFT. These stages differs from + * other stages in that the inputs are unscrambled and the data is + * reformated into parallel arrays (ie, seperate real and imaginary + * arrays). The term "unscramble" means the bit address reverse the + * data array. In addition, the first three stages have simple twiddle + * weighting factors. + * stage 1: (1, 0) + * stage 2: (1, 0) and (0, -1) + * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707) + * + * The arrays are processed as two halves, simultaneously. The lo (first + * half) and hi (second half). This is done because the scramble + * shares source value between each half of the output arrays. + */ + i = 0; + i_rev = 0; + + in0 = in; + in1 = in + n_8; + in2 = in + n_16; + in3 = in + n_3_16; + + in4 = in + n_4; + in5 = in1 + n_4; + in6 = in2 + n_4; + in7 = in3 + n_4; + + re0 = re; + re1 = re + n_8; + im0 = im; + im1 = im + n_8; + + w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2}; + w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2}; + + do { + src_lo0 = in0[i_rev]; + src_lo1 = in1[i_rev]; + src_lo2 = in2[i_rev]; + src_lo3 = in3[i_rev]; + + src_hi0 = in4[i_rev]; + src_hi1 = in5[i_rev]; + src_hi2 = in6[i_rev]; + src_hi3 = in7[i_rev]; + + /* Perform scramble. + */ + dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo); + dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi); + dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo); + dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi); + dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo); + dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi); + dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo); + dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi); + + /* Perform the stage 1 butterfly. The multiplier constant, ppmm, + * is used to control the sign of the operands since a single + * quadword contains both of P and Q valule of the butterfly. + */ + pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8)); + pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8)); + pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8)); + pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8)); + pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8)); + pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8)); + pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8)); + pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8)); + + /* Perfrom the stage 2 butterfly. For this stage, the + * inputs pq are still interleaved (p.real, p.imag, q.real, + * q.imag), so we must first re-order the data into + * parallel arrays as well as perform the reorder + * associated with the twiddle W[n/4], which equals + * (0, -1). + * + * ie. (A, B) * (0, -1) => (B, -A) + */ + re_lo0 = spu_madd(ppmm, + spu_shuffle(pq_lo1, pq_lo1, shuf_0303), + spu_shuffle(pq_lo0, pq_lo0, shuf_0202)); + im_lo0 = spu_madd(pmmp, + spu_shuffle(pq_lo1, pq_lo1, shuf_1212), + spu_shuffle(pq_lo0, pq_lo0, shuf_1313)); + + re_lo1 = spu_madd(ppmm, + spu_shuffle(pq_lo3, pq_lo3, shuf_0303), + spu_shuffle(pq_lo2, pq_lo2, shuf_0202)); + im_lo1 = spu_madd(pmmp, + spu_shuffle(pq_lo3, pq_lo3, shuf_1212), + spu_shuffle(pq_lo2, pq_lo2, shuf_1313)); + + + re_hi0 = spu_madd(ppmm, + spu_shuffle(pq_hi1, pq_hi1, shuf_0303), + spu_shuffle(pq_hi0, pq_hi0, shuf_0202)); + im_hi0 = spu_madd(pmmp, + spu_shuffle(pq_hi1, pq_hi1, shuf_1212), + spu_shuffle(pq_hi0, pq_hi0, shuf_1313)); + + re_hi1 = spu_madd(ppmm, + spu_shuffle(pq_hi3, pq_hi3, shuf_0303), + spu_shuffle(pq_hi2, pq_hi2, shuf_0202)); + im_hi1 = spu_madd(pmmp, + spu_shuffle(pq_hi3, pq_hi3, shuf_1212), + spu_shuffle(pq_hi2, pq_hi2, shuf_1313)); + + + /* Perform stage 3 butterfly. + */ + FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + re0 += 2; + re1 += 2; + im0 += 2; + im1 += 2; + + i += 8; + i_rev = BIT_SWAP(i, reverse) / 2; + } while (i < n_2); + + /* Process stages 4 to log2_size-2 + */ + for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) { + w_stride = n_2 >> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + W0 = W; + W1 = W + w_stride; + W2 = W + w_2stride; + W3 = W + w_3stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { + /* Compute the twiddle factors + */ + w0 = W0[offset]; + w1 = W1[offset]; + w2 = W2[offset]; + w3 = W3[offset]; + + tmp0 = spu_shuffle(w0, w2, shuf_0415); + tmp1 = spu_shuffle(w1, w3, shuf_0415); + + w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); + w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); + + j = i; + k = i + stride; + do { + re_lo0 = re0[j]; im_lo0 = im0[j]; + re_lo1 = re1[j]; im_lo1 = im1[j]; + + re_hi0 = re2[j]; im_hi0 = im2[j]; + re_hi1 = re3[j]; im_hi1 = im3[j]; + + re_lo2 = re0[k]; im_lo2 = im0[k]; + re_lo3 = re1[k]; im_lo3 = im1[k]; + + re_hi2 = re2[k]; im_hi2 = im2[k]; + re_hi3 = re3[k]; im_hi3 = im3[k]; + + FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); + + j += 2 * stride; + k += 2 * stride; + } while (j < n_4); + } + } + + /* Process stage log2_size-1. This is identical to the stage processing above + * except for this stage the inner loop is only executed once so it is removed + * entirely. + */ + w_stride = n_2 >> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { + /* Compute the twiddle factors + */ + w0 = W[offset]; + w1 = W[offset + w_stride]; + w2 = W[offset + w_2stride]; + w3 = W[offset + w_3stride]; + + tmp0 = spu_shuffle(w0, w2, shuf_0415); + tmp1 = spu_shuffle(w1, w3, shuf_0415); + + w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); + w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); + + j = i; + k = i + stride; + + re_lo0 = re0[j]; im_lo0 = im0[j]; + re_lo1 = re1[j]; im_lo1 = im1[j]; + + re_hi0 = re2[j]; im_hi0 = im2[j]; + re_hi1 = re3[j]; im_hi1 = im3[j]; + + re_lo2 = re0[k]; im_lo2 = im0[k]; + re_lo3 = re1[k]; im_lo3 = im1[k]; + + re_hi2 = re2[k]; im_hi2 = im2[k]; + re_hi3 = re3[k]; im_hi3 = im3[k]; + + FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); + } + + + /* Process the final stage (stage log2_size). For this stage, + * reformat the data from parallel arrays back into + * interleaved arrays,storing the result into <in>. + * + * This loop has been manually unrolled by 2 to improve + * dual issue rates and reduce stalls. This unrolling + * forces a minimum FFT size of 32. + */ + re0 = re; + re1 = re + n_8; + re2 = re + n_16; + re3 = re + n_3_16; + + im0 = im; + im1 = im + n_8; + im2 = im + n_16; + im3 = im + n_3_16; + + out0 = out; + out1 = out + n_4; + out2 = out + n_8; + out3 = out1 + n_8; + + i = n_16; + + do { + /* Fetch the twiddle factors + */ + w0 = W[0]; + w1 = W[1]; + w2 = W[2]; + w3 = W[3]; + + W += 4; + + w0_re = spu_shuffle(w0, w1, shuf_0246); + w0_im = spu_shuffle(w0, w1, shuf_1357); + w1_re = spu_shuffle(w2, w3, shuf_0246); + w1_im = spu_shuffle(w2, w3, shuf_1357); + + /* Fetch the butterfly inputs, reals and imaginaries + */ + re_lo0 = re0[0]; im_lo0 = im0[0]; + re_lo1 = re1[0]; im_lo1 = im1[0]; + re_lo2 = re0[1]; im_lo2 = im0[1]; + re_lo3 = re1[1]; im_lo3 = im1[1]; + + re_hi0 = re2[0]; im_hi0 = im2[0]; + re_hi1 = re3[0]; im_hi1 = im3[0]; + re_hi2 = re2[1]; im_hi2 = im2[1]; + re_hi3 = re3[1]; im_hi3 = im3[1]; + + re0 += 2; im0 += 2; + re1 += 2; im1 += 2; + re2 += 2; im2 += 2; + re3 += 2; im3 += 2; + + /* Perform the butterflys + */ + FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im); + + FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im); + + /* Interleave the results and store them into the output buffers (ie, + * the original input buffers. + */ + out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415); + out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637); + out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415); + out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637); + + out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415); + out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637); + out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415); + out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637); + + out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415); + out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637); + out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415); + out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637); + + out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415); + out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637); + out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415); + out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637); + + out0 += 4; + out1 += 4; + out2 += 4; + out3 += 4; + + i -= 2; + } while (i); +} + +#endif /* _FFT_1D_R2_H_ */ diff --git a/gcell/src/lib/general/spu/libfft.h b/gcell/src/lib/general/spu/libfft.h new file mode 100644 index 000000000..dd387be0c --- /dev/null +++ b/gcell/src/lib/general/spu/libfft.h @@ -0,0 +1,113 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2008 Free Software Foundation, Inc. */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ + +#ifndef INCLUDED_LIBFFT_H +#define INCLUDED_LIBFFT_H + +// must be defined before inclusion of fft_1d_r2.h +#define MAX_FFT_1D_SIZE 4096 + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input <in> is an array of complex numbers of length (1<<log2_size) + * entries. The result is returned in the array of complex numbers specified + * by <out>. Note: This routine can support an in-place transformation + * by specifying <in> and <out> to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of <log2_size> stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[i].imag = -sin(i * 2*PI/n); + * } + * + * This array actually only contains the first half of the twiddle + * factors. Due for symmetry, the second half of the twiddle factors + * are implied and equal: + * + * for (i=0; i<n/4; i++) + * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); + * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); + * } + * + * Further symmetry allows one to generate the twiddle factor table + * using half the number of trig computations as follows: + * + * W[0].real = 1.0; + * W[0].imag = 0.0; + * for (i=1; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[n/4 - i].imag = -W[i].real; + * } + * + * The complex numbers are packed into quadwords as follows: + * + * quadword complex + * array element array elements + * ----------------------------------------------------- + * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | + * ----------------------------------------------------- + * + */ + +void fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size); + +#endif /* INCLUDED_LIBFFT_H */ diff --git a/gcell/src/lib/runtime/Makefile.am b/gcell/src/lib/runtime/Makefile.am index 3f2077c08..a68d2bcd0 100644 --- a/gcell/src/lib/runtime/Makefile.am +++ b/gcell/src/lib/runtime/Makefile.am @@ -20,8 +20,6 @@ include $(top_srcdir)/Makefile.common -# SUBDIRS = spu . - IBM_PPU_SYNC_INCLUDES = -I$(top_srcdir)/gcell/src/ibm/sync/ppu_source @@ -29,6 +27,8 @@ AM_CPPFLAGS = $(DEFINES) $(OMNITHREAD_INCLUDES) $(MBLOCK_INCLUDES) $(CPPUNIT_INC $(GCELL_INCLUDES) $(IBM_PPU_SYNC_INCLUDES) $(WITH_INCLUDES) +dist_bin_SCRIPTS = gcell-embedspu-libtool + noinst_LTLIBRARIES = libruntime.la libruntime-qa.la libruntime_la_SOURCES = \ @@ -39,7 +39,7 @@ libruntime_la_SOURCES = \ gc_proc_def_utils.cc libruntime_qa_la_SOURCES = \ - qa_lib.cc \ + qa_gcell_runtime.cc \ qa_jd_queue.cc \ qa_jd_stack.cc \ qa_job_manager.cc @@ -55,22 +55,14 @@ noinst_HEADERS = \ qa_jd_queue.h \ qa_jd_stack.h \ qa_job_manager.h \ - qa_lib.h + qa_gcell_runtime.h - -# This kruft is required to link the QA SPU executable into the PPE shared lib w/o warnings -gcell_qa.lo: ../spu/gcell_qa - ppu-embedspu -m32 -fpic gcell_qa ../spu/gcell_qa .libs/gcell_qa.o - @rm -f gcell_qa.lo - @echo "# gcell_qa.lo - a libtool object file" >> gcell_qa.lo - @echo "# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06)" >> gcell_qa.lo - @echo "#" >> gcell_qa.lo - @echo "# Please DO NOT delete this file!" >> gcell_qa.lo - @echo "# It is necessary for linking the library." >> gcell_qa.lo - @echo "" >> gcell_qa.lo - @echo "pic_object='.libs/gcell_qa.o'" >> gcell_qa.lo - @echo "non_pic_object=none" >> gcell_qa.lo +# generate a libtool.lo that contains an embeded SPU executable +gcell_runtime_qa.lo: ../spu/gcell_runtime_qa + $(GCELL_EMBEDSPU_LIBTOOL) $@ $< libruntime_qa_la_LIBADD = \ - gcell_qa.lo \ + gcell_runtime_qa.lo \ libruntime.la + +CLEANFILES = gcell_runtime_qa.lo diff --git a/gcell/src/lib/runtime/gc_job_manager.cc b/gcell/src/lib/runtime/gc_job_manager.cc index 94090bedf..9ede5e156 100644 --- a/gcell/src/lib/runtime/gc_job_manager.cc +++ b/gcell/src/lib/runtime/gc_job_manager.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2007 Free Software Foundation, Inc. + * Copyright 2007,2008 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -24,11 +24,17 @@ #endif #include "gc_job_manager.h" #include "gc_job_manager_impl.h" +#include <boost/weak_ptr.hpp> +#include <stdio.h> -gc_job_manager * + +static boost::weak_ptr<gc_job_manager> s_singleton; + + +gc_job_manager_sptr gc_make_job_manager(const gc_jm_options *options) { - return new gc_job_manager_impl(options); + return gc_job_manager_sptr(new gc_job_manager_impl(options)); } gc_job_manager::gc_job_manager(const gc_jm_options *options) @@ -52,3 +58,103 @@ gc_job_manager::debug() { return 0; } + +void +gc_job_manager::set_singleton(gc_job_manager_sptr mgr) +{ + s_singleton = mgr; +} + +gc_job_manager_sptr +gc_job_manager::singleton() +{ + return gc_job_manager_sptr(s_singleton); +} + +// ------------------------------------------------------------------------ + + +// custom deleter +class spe_program_handle_deleter { +public: + void operator()(spe_program_handle_t *program) { + if (program){ + int r = spe_image_close(program); + if (r != 0){ + perror("spe_image_close"); + } + } + } +}; + +// nop custom deleter +class nop_spe_program_handle_deleter { +public: + void operator()(spe_program_handle_t *program) { + } +}; + +spe_program_handle_sptr +gc_program_handle_from_filename(const std::string &filename) +{ + return spe_program_handle_sptr(spe_image_open(filename.c_str()), + spe_program_handle_deleter()); +} + + +spe_program_handle_sptr +gc_program_handle_from_address(spe_program_handle_t *handle) +{ + return spe_program_handle_sptr(handle, nop_spe_program_handle_deleter()); +} + +const std::string +gc_job_status_string(gc_job_status_t status) +{ + switch(status){ + case JS_OK: return "JS_OK"; + case JS_SHUTTING_DOWN: return "JS_SHUTTING_DOWN"; + case JS_TOO_MANY_CLIENTS: return "JS_TOO_MANY_CLIENTS"; + case JS_UNKNOWN_PROC: return "JS_UNKNOWN_PROC"; + case JS_BAD_DIRECTION: return "JS_BAD_DIRECTION"; + case JS_BAD_EAH: return "JS_BAD_EAH"; + case JS_BAD_N_DIRECT: return "JS_BAD_N_DIRECT"; + case JS_BAD_N_EA: return "JS_BAD_N_EA"; + case JS_ARGS_TOO_LONG: return "JS_ARGS_TOO_LONG"; + case JS_BAD_JUJU: return "JS_BAD_JUJU"; + case JS_BAD_JOB_DESC: return "JS_BAD_JOB_DESC"; + default: + char buf[100]; + snprintf(buf, sizeof(buf), "unknown gc_job_status_t (%d)\n", status); + return buf; + } +} + +/* + * exception classes + */ + +gc_exception::gc_exception(const std::string &msg) + : runtime_error(msg) +{ +} + +gc_unknown_proc::gc_unknown_proc(const std::string &msg) + : gc_exception("gc_unknown_proc: " + msg) +{ +} + +gc_bad_alloc::gc_bad_alloc(const std::string &msg) + : gc_exception("gc_bad_alloc: " + msg) +{ +} + +gc_bad_align::gc_bad_align(const std::string &msg) + : gc_exception("gc_bad_align: " + msg) +{ +} + +gc_bad_submit::gc_bad_submit(const std::string &name, gc_job_status_t status) + : gc_exception("gc_bad_submit(" + name + "): " + gc_job_status_string(status)) +{ +} diff --git a/gcell/src/lib/runtime/gc_job_manager.h b/gcell/src/lib/runtime/gc_job_manager.h index 9c8e70bf8..aa30dc24b 100644 --- a/gcell/src/lib/runtime/gc_job_manager.h +++ b/gcell/src/lib/runtime/gc_job_manager.h @@ -23,17 +23,52 @@ #define INCLUDED_GC_JOB_MANAGER_H #include <boost/utility.hpp> +#include <boost/shared_ptr.hpp> #include <vector> #include <string> +#include <stdexcept> #include <libspe2.h> #include "gc_job_desc.h" class gc_job_manager; +typedef boost::shared_ptr<gc_job_manager> gc_job_manager_sptr; +typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr; -enum gc_wait_mode { - GC_WAIT_ANY, - GC_WAIT_ALL, -}; +/*! + * \brief Return a boost::shared_ptr to an spe_program_handle_t + * + * \param filename is the name of the SPE ELF executable to open. + * + * Calls spe_image_open to open the file. If successful returns a + * boost::shared_ptr that will call spe_image_close when it's time to + * free the object. + * + * Returns the equivalent of the NULL pointer if the file cannot be + * opened, or if it's not an SPE ELF object file. + * + * \sa gc_program_handle_from_address + */ +spe_program_handle_sptr +gc_program_handle_from_filename(const std::string &filename); + +/*! + * \brief Return a boost::shared_ptr to an spe_program_handle_t + * + * \param handle is a non-zero pointer to an embedded SPE image. + * + * If successful returns a boost::shared_ptr that does nothing when + * it's time to free the object. + * + * \sa gc_program_handle_from_filename + */ +spe_program_handle_sptr +gc_program_handle_from_address(spe_program_handle_t *handle); + +/*! + * \brief map gc_job_status_t into a string + */ +const std::string +gc_job_status_string(gc_job_status_t status); /* * \brief Options that configure the job_manager. @@ -46,23 +81,59 @@ struct gc_jm_options { bool gang_schedule; // shall we gang schedule? bool use_affinity; // shall we try for affinity (FIXME not implmented) bool enable_logging; // shall we log SPE events? - uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k) - spe_program_handle_t *program_handle; // program to load into SPEs + uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k) + spe_program_handle_sptr program_handle; // program to load into SPEs gc_jm_options() : max_jobs(0), max_client_threads(0), nspes(0), gang_schedule(true), use_affinity(false), - enable_logging(false), log2_nlog_entries(12), - program_handle(0) + enable_logging(false), log2_nlog_entries(12) { } }; +enum gc_wait_mode { + GC_WAIT_ANY, + GC_WAIT_ALL, +}; + +/* + * exception classes + */ +class gc_exception : public std::runtime_error +{ +public: + gc_exception(const std::string &msg); +}; + +class gc_unknown_proc : public gc_exception +{ +public: + gc_unknown_proc(const std::string &msg); +}; + +class gc_bad_alloc : public gc_exception +{ +public: + gc_bad_alloc(const std::string &msg); +}; + +class gc_bad_align : public gc_exception +{ +public: + gc_bad_align(const std::string &msg); +}; + +class gc_bad_submit : public gc_exception +{ +public: + gc_bad_submit(const std::string &name, gc_job_status_t status); +}; /* * \brief Create an instance of the job manager */ -gc_job_manager * +gc_job_manager_sptr gc_make_job_manager(const gc_jm_options *options = 0); @@ -92,7 +163,7 @@ public: /*! * \brief Return a pointer to a properly aligned job descriptor, - * or zero if none are available. + * or throws gc_bad_alloc if there are none available. */ virtual gc_job_desc *alloc_job_desc() = 0; @@ -139,6 +210,8 @@ public: * A thread may only wait for jobs which it submitted. * * \returns number of jobs completed, or -1 if error. + * The caller must examine the status field of each job to confirm + * successful completion of the job. */ virtual int wait_jobs(unsigned int njobs, @@ -154,7 +227,7 @@ public: /*! * Return gc_proc_id_t associated with spu procedure \p proc_name if one - * exists, otherwise return GCP_UNKNOWN_PROC. + * exists, otherwise throws gc_unknown_proc. */ virtual gc_proc_id_t lookup_proc(const std::string &proc_name) = 0; @@ -163,6 +236,27 @@ public: */ virtual std::vector<std::string> proc_names() = 0; + /*! + * \brief Set the singleton gc_job_manager instance. + * \param mgr is the job manager instance. + * + * The singleton is weakly held, thus the caller must maintain + * a reference to the mgr for the duration. (If we held the + * manager strongly, the destructor would never be called, and the + * resources (SPEs) would not be returned.) Bottom line: the + * caller is responsible for life-time management. + */ + static void set_singleton(gc_job_manager_sptr mgr); + + /*! + * \brief Retrieve the singleton gc_job_manager instance. + * + * Returns the singleton gc_job_manager instance or raises + * boost::bad_weak_ptr if the singleton is empty. + */ + static gc_job_manager_sptr singleton(); + + virtual void set_debug(int debug); virtual int debug(); }; diff --git a/gcell/src/lib/runtime/gc_job_manager_impl.cc b/gcell/src/lib/runtime/gc_job_manager_impl.cc index dd08154d0..59deb4ae5 100644 --- a/gcell/src/lib/runtime/gc_job_manager_impl.cc +++ b/gcell/src/lib/runtime/gc_job_manager_impl.cc @@ -65,19 +65,6 @@ public: } }; -// custom deleter -class spe_program_handle_deleter { -public: - void operator()(spe_program_handle_t *program) { - if (program){ - int r = spe_image_close(program); - if (r != 0){ - perror("spe_image_close"); - } - } - } -}; - // custom deleter of anything that can be freed with "free" class free_deleter { @@ -150,7 +137,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options) if (d_options.max_client_threads == 0) d_options.max_client_threads = DEFAULT_MAX_CLIENT_THREADS; - if (d_options.program_handle == 0){ + if (!d_options.program_handle){ fprintf(stderr, "gc_job_manager: options->program_handle must be non-zero\n"); throw std::runtime_error("gc_job_manager: options->program_handle must be non-zero"); } @@ -236,7 +223,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options) // get a handle to the spe program - spe_program_handle_t *spe_image = d_options.program_handle; + spe_program_handle_t *spe_image = d_options.program_handle.get(); // fish proc_def table out of SPE ELF file @@ -431,8 +418,12 @@ gc_job_manager_impl::bv_isclr(unsigned long *bv, unsigned int bitno) gc_job_desc * gc_job_manager_impl::alloc_job_desc() { - // stack is lock free, thus safe to call from any thread - return gc_jd_stack_pop(d_free_list); + // stack is lock free, and safe to call from any thread + gc_job_desc *jd = gc_jd_stack_pop(d_free_list); + if (jd == 0) + throw gc_bad_alloc("alloc_job_desc: none available"); + + return jd; } void @@ -557,7 +548,7 @@ bool gc_job_manager_impl::wait_job(gc_job_desc *jd) { bool done; - return wait_jobs(1, &jd, &done, GC_WAIT_ANY) == 1; + return wait_jobs(1, &jd, &done, GC_WAIT_ANY) == 1 && jd->status == JS_OK; } int @@ -1246,7 +1237,7 @@ gc_job_manager_impl::lookup_proc(const std::string &proc_name) if (proc_name == d_proc_def[i].name) return i; - return GCP_UNKNOWN_PROC; + throw gc_unknown_proc(proc_name); } std::vector<std::string> diff --git a/gcell/src/lib/runtime/gc_job_manager_impl.h b/gcell/src/lib/runtime/gc_job_manager_impl.h index 46897848c..fcc24dc0c 100644 --- a/gcell/src/lib/runtime/gc_job_manager_impl.h +++ b/gcell/src/lib/runtime/gc_job_manager_impl.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2007 Free Software Foundation, Inc. + * Copyright 2007,2008 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -29,7 +29,6 @@ #include "gc_spu_args.h" #include <libspe2.h> #include <vector> -#include <boost/shared_ptr.hpp> #include <boost/scoped_array.hpp> typedef boost::shared_ptr<spe_gang_context> spe_gang_context_sptr; @@ -169,7 +168,7 @@ private: void sync_logfiles(); void unmap_logfiles(); - friend gc_job_manager *gc_make_job_manager(const gc_jm_options *options); + friend gc_job_manager_sptr gc_make_job_manager(const gc_jm_options *options); gc_job_manager_impl(const gc_jm_options *options = 0); diff --git a/gcell/src/lib/runtime/gcell-embedspu-libtool b/gcell/src/lib/runtime/gcell-embedspu-libtool new file mode 100755 index 000000000..a4ee53b7e --- /dev/null +++ b/gcell/src/lib/runtime/gcell-embedspu-libtool @@ -0,0 +1,29 @@ +#!/bin/bash + +if [ $# -ne 2 ]; then + echo "usage: gcell-embedspu-libtool file.lo spu_executable_file" 1>&2 + exit 1 +fi + +lo_file=$1 +spu_executable=$2 +symbol_name=${lo_file%%.lo} + +# generate the .o file that wraps the SPU executable +ppu-embedspu -m32 -fpic ${symbol_name} ${spu_executable} .libs/${symbol_name}.o + +# generate the .lo libtool file that points at all the right places +rm -f $lo_file +cat >$lo_file.new <<EOF +# $lo_file - a libtool object file +# Generated by ltmain.sh - GNU libtool 1.5.22 (1.1220.2.365 2005/12/18 22:14:06) +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +pic_object='.libs/${symbol_name}.o' +non_pic_object=none +EOF + +mv $lo_file.new $lo_file + diff --git a/gcell/src/lib/runtime/qa_lib.cc b/gcell/src/lib/runtime/qa_gcell_runtime.cc index d8a8960c6..fef9a7fb4 100644 --- a/gcell/src/lib/runtime/qa_lib.cc +++ b/gcell/src/lib/runtime/qa_gcell_runtime.cc @@ -25,15 +25,15 @@ * add them here. */ -#include <qa_lib.h> +#include <qa_gcell_runtime.h> #include <qa_jd_stack.h> #include <qa_jd_queue.h> #include <qa_job_manager.h> CppUnit::TestSuite * -qa_lib::suite() +qa_gcell_runtime::suite() { - CppUnit::TestSuite *s = new CppUnit::TestSuite("lib"); + CppUnit::TestSuite *s = new CppUnit::TestSuite("runtime"); s->addTest(qa_jd_stack::suite()); s->addTest(qa_jd_queue::suite()); diff --git a/gcell/src/lib/runtime/qa_lib.h b/gcell/src/lib/runtime/qa_gcell_runtime.h index 594efcdc8..36180c919 100644 --- a/gcell/src/lib/runtime/qa_lib.h +++ b/gcell/src/lib/runtime/qa_gcell_runtime.h @@ -18,18 +18,18 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef INCLUDED_QA_LIB_H -#define INCLUDED_QA_LIB_H +#ifndef INCLUDED_QA_GCELL_RUNTIME_H +#define INCLUDED_QA_GCELL_RUNTIME_H #include <cppunit/TestSuite.h> -//! collect all the tests for the lib directory +//! collect all the tests for the runtime directory -class qa_lib { +class qa_gcell_runtime { public: //! return suite of tests static CppUnit::TestSuite *suite(); }; -#endif /* INCLUDED_QA_LIB_H */ +#endif /* INCLUDED_QA_GCELL_RUNTIME_H */ diff --git a/gcell/src/lib/runtime/qa_job_manager.cc b/gcell/src/lib/runtime/qa_job_manager.cc index 3f2780c52..53a1ec681 100644 --- a/gcell/src/lib/runtime/qa_job_manager.cc +++ b/gcell/src/lib/runtime/qa_job_manager.cc @@ -29,7 +29,8 @@ #include <malloc.h> -extern spe_program_handle_t gcell_qa; // handle to embedded SPU executable w/ QA routines +// handle to embedded SPU executable w/ QA routines +extern spe_program_handle_t gcell_runtime_qa; #if 0 static void @@ -173,23 +174,21 @@ qa_job_manager::t15() void qa_job_manager::t1_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); mgr = gc_make_job_manager(&opts); - delete mgr; } void qa_job_manager::t2_body() { - gc_job_manager *mgr = 0; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 100; opts.gang_schedule = false; mgr = gc_make_job_manager(&opts); - delete mgr; } void @@ -200,13 +199,12 @@ qa_job_manager::t3_body() // cppunit. cppunit is the prime suspect. #if 0 - gc_job_manager *mgr = 0; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 100; opts.gang_schedule = true; CPPUNIT_ASSERT_THROW(mgr = gc_make_job_manager(&opts), std::out_of_range); - delete mgr; #endif } @@ -222,9 +220,9 @@ init_jd(gc_job_desc *jd, gc_proc_id_t proc_id) void qa_job_manager::t4_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); //mgr->set_debug(-1); @@ -232,8 +230,8 @@ qa_job_manager::t4_body() gc_job_desc *jds[NJOBS]; bool done[NJOBS]; - gc_proc_id_t gcp_no_such = mgr->lookup_proc("--no-such-proc-name--"); - CPPUNIT_ASSERT_EQUAL(GCP_UNKNOWN_PROC, gcp_no_such); + gc_proc_id_t gcp_no_such; + CPPUNIT_ASSERT_THROW(gcp_no_such = mgr->lookup_proc("--no-such-proc-name--"), gc_unknown_proc); gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop"); CPPUNIT_ASSERT(gcp_qa_nop != GCP_UNKNOWN_PROC); @@ -256,16 +254,14 @@ qa_job_manager::t4_body() for (int i = 0; i < NJOBS; i++){ mgr->free_job_desc(jds[i]); } - - delete mgr; } void qa_job_manager::t5_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 0; // use them all mgr = gc_make_job_manager(&opts); //mgr->set_debug(-1); @@ -293,16 +289,14 @@ qa_job_manager::t5_body() for (int i = 0; i < NJOBS; i++){ mgr->free_job_desc(jds[i]); } - - delete mgr; } void qa_job_manager::t6_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); gc_proc_id_t gcp_qa_nop = mgr->lookup_proc("qa_nop"); @@ -330,7 +324,6 @@ qa_job_manager::t6_body() } mgr->free_job_desc(jd); - delete mgr; } static int @@ -344,7 +337,7 @@ sum_shorts(short *p, int nshorts) } static void -test_sum_shorts(gc_job_manager *mgr, short *buf, int nshorts) +test_sum_shorts(gc_job_manager_sptr mgr, short *buf, int nshorts) { gc_job_desc *jd = mgr->alloc_job_desc(); gc_proc_id_t gcp_qa_sum_shorts = mgr->lookup_proc("qa_sum_shorts"); @@ -379,9 +372,9 @@ static short short_buf[NS] _AL128; // for known alignment void qa_job_manager::t7_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); @@ -400,8 +393,6 @@ qa_job_manager::t7_body() for (int offset = 0; offset <= 64; offset++){ test_sum_shorts(mgr, &short_buf[offset], ea_args_maxsize/sizeof(short)); } - - delete mgr; } // @@ -410,9 +401,9 @@ qa_job_manager::t7_body() void qa_job_manager::t8_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); gc_job_desc *jd = mgr->alloc_job_desc(); @@ -433,7 +424,6 @@ qa_job_manager::t8_body() } mgr->free_job_desc(jd); - delete mgr; } // @@ -444,9 +434,9 @@ qa_job_manager::t9_body() { static const int N = 127; static const int M = 201; - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); gc_job_desc *jd = mgr->alloc_job_desc(); @@ -474,7 +464,6 @@ qa_job_manager::t9_body() } mgr->free_job_desc(jd); - delete mgr; } static bool @@ -510,7 +499,7 @@ confirm_seq(const unsigned char *buf, size_t len, unsigned char v) } static void -test_put_seq(gc_job_manager *mgr, int offset, int len, int starting_val) +test_put_seq(gc_job_manager_sptr mgr, int offset, int len, int starting_val) { gc_job_desc *jd = mgr->alloc_job_desc(); gc_proc_id_t gcp_qa_put_seq = mgr->lookup_proc("qa_put_seq"); @@ -556,9 +545,9 @@ test_put_seq(gc_job_manager *mgr, int offset, int len, int starting_val) void qa_job_manager::t10_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); @@ -576,8 +565,6 @@ qa_job_manager::t10_body() for (int offset = 0; offset <= 64; offset++){ test_put_seq(mgr, offset, ea_args_maxsize, starting_val++); } - - delete mgr; } // @@ -586,9 +573,9 @@ qa_job_manager::t10_body() void qa_job_manager::t11_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); gc_job_desc *jd = mgr->alloc_job_desc(); @@ -611,7 +598,6 @@ qa_job_manager::t11_body() } mgr->free_job_desc(jd); - delete mgr; } // @@ -622,9 +608,9 @@ qa_job_manager::t12_body() { static const int N = 127; static const int M = 201; - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); gc_job_desc *jd = mgr->alloc_job_desc(); @@ -662,7 +648,6 @@ qa_job_manager::t12_body() } mgr->free_job_desc(jd); - delete mgr; } // @@ -671,9 +656,9 @@ qa_job_manager::t12_body() void qa_job_manager::t13_body() { - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); @@ -720,8 +705,6 @@ qa_job_manager::t13_body() CPPUNIT_ASSERT(ok); } mgr->free_job_desc(jd); - - delete mgr; } /* @@ -743,9 +726,9 @@ qa_job_manager::t14_body() memset(buf, 0xff, LEN_PER_JOB * NJOBS); - gc_job_manager *mgr; + gc_job_manager_sptr mgr; gc_jm_options opts; - opts.program_handle = &gcell_qa; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); opts.nspes = 1; mgr = gc_make_job_manager(&opts); @@ -788,11 +771,19 @@ qa_job_manager::t14_body() // cleanup for (int i = 0; i < NJOBS; i++) mgr->free_job_desc(jd[i]); - - delete mgr; } void qa_job_manager::t15_body() { + gc_jm_options opts; + opts.program_handle = gc_program_handle_from_address(&gcell_runtime_qa); + opts.nspes = 1; + gc_job_manager_sptr mgr = gc_make_job_manager(&opts); + + gc_job_manager::set_singleton(mgr); + + CPPUNIT_ASSERT(gc_job_manager::singleton()); + mgr.reset(); + CPPUNIT_ASSERT_THROW(gc_job_manager::singleton(), boost::bad_weak_ptr); } diff --git a/gcell/src/lib/runtime/spu/gc_spu_config.h b/gcell/src/lib/runtime/spu/gc_spu_config.h index 997645e68..6eff71060 100644 --- a/gcell/src/lib/runtime/spu/gc_spu_config.h +++ b/gcell/src/lib/runtime/spu/gc_spu_config.h @@ -24,7 +24,7 @@ #include <gc_job_desc.h> #define CACHE_LINE_SIZE 128 // in bytes -#define GC_SPU_BUFSIZE_BASE (32 * 1024) // must be multiple of CACHE_LINE_SIZE +#define GC_SPU_BUFSIZE_BASE (40 * 1024) // must be multiple of CACHE_LINE_SIZE #define GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * CACHE_LINE_SIZE) #define NGETBUFS 1 // single buffer job arg gets diff --git a/gcell/src/lib/runtime/spu/gcell_qa.c b/gcell/src/lib/runtime/spu/gcell_runtime_qa.c index 51bf38a6a..51bf38a6a 100644 --- a/gcell/src/lib/runtime/spu/gcell_qa.c +++ b/gcell/src/lib/runtime/spu/gcell_runtime_qa.c diff --git a/gcell/src/lib/spu/Makefile.am b/gcell/src/lib/spu/Makefile.am index fac057cd5..3c96d8f30 100644 --- a/gcell/src/lib/spu/Makefile.am +++ b/gcell/src/lib/spu/Makefile.am @@ -57,23 +57,28 @@ runtime_spu_noinst_headers = \ general_srcdir = $(srcdir)/../general/spu -general_spu_sources = +general_spu_sources = \ + $(general_srcdir)/fft_1d_r2.c -general_spu_headers = +general_spu_headers = \ + $(general_srcdir)/libfft.h -general_spu_noinst_headers = +general_spu_noinst_headers = \ + $(general_srcdir)/fft_1d.h \ + $(general_srcdir)/fft_1d_r2.h # ---------------------------------------------------------------- -# files in the lib/procs/spu directory +# files in the lib/wrapper/spu directory -procs_srcdir = $(srcdir)/../proc/spu +wrapper_srcdir = $(srcdir)/../wrapper/spu -procs_spu_sources = +wrapper_spu_sources = \ + $(wrapper_srcdir)/gcs_fft_1d_r2.c -procs_spu_headers = +wrapper_spu_headers = -procs_spu_noinst_headers = +wrapper_spu_noinst_headers = # ---------------------------------------------------------------- # build the library from the files in the three directories @@ -81,23 +86,29 @@ procs_spu_noinst_headers = libgcell_spu_a_SOURCES = \ $(runtime_spu_sources) \ $(general_spu_sources) \ - $(procs_spu_sources) + $(wrapper_spu_sources) gcellspuinclude_HEADERS = \ $(runtime_spu_headers) \ $(general_spu_headers) \ - $(procs_spu_headers) + $(wrapper_spu_headers) noinst_HEADERS = \ $(runtime_spu_noinst_headers) \ $(general_spu_noinst_headers) \ - $(procs_spu_noinst_headers) + $(wrapper_spu_noinst_headers) # ---------------------------------------------------------------- -# SPU executable containing QA code +# SPU executables noinst_PROGRAMS = \ - gcell_qa + gcell_all \ + gcell_runtime_qa -gcell_qa_SOURCES = $(runtime_srcdir)/gcell_qa.c -gcell_qa_LDADD = libgcell_spu.a +# all known gcell procs (at least until they get too big) +gcell_all_SOURCES = $(wrapper_spu_sources) +gcell_all_LDADD = libgcell_spu.a + +# just the QA code required for testing the runtime +gcell_runtime_qa_SOURCES = $(runtime_srcdir)/gcell_runtime_qa.c +gcell_runtime_qa_LDADD = libgcell_spu.a diff --git a/gcell/src/lib/procs/Makefile.am b/gcell/src/lib/wrapper/Makefile.am index 0e32ffc37..03ffa54b3 100644 --- a/gcell/src/lib/procs/Makefile.am +++ b/gcell/src/lib/wrapper/Makefile.am @@ -20,5 +20,31 @@ include $(top_srcdir)/Makefile.common -# SUBDIRS = spu . +AM_CPPFLAGS = $(DEFINES) $(GCELL_INCLUDES) $(FFTW3F_CFLAGS) $(WITH_INCLUDES) +noinst_LTLIBRARIES = libwrapper.la libwrapper-qa.la + +# generate a libtool.lo that contains an embeded SPU executable +gcell_all.lo: ../spu/gcell_all + $(GCELL_EMBEDSPU_LIBTOOL) $@ $< + +libwrapper_la_SOURCES = \ + gcp_fft_1d_r2.cc + +libwrapper_la_LIBADD = \ + gcell_all.lo + +libwrapper_qa_la_SOURCES = \ + qa_gcell_wrapper.cc \ + qa_gcp_fft_1d_r2.cc + +libwrapper_qa_la_LIBADD = \ + -lfftw3f + +gcellinclude_HEADERS = \ + gcp_fft_1d_r2.h + +noinst_HEADERS = \ + qa_gcell_wrapper.h + +CLEANFILES = gcell_all.lo diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc new file mode 100644 index 000000000..d639dad45 --- /dev/null +++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc @@ -0,0 +1,116 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <gcp_fft_1d_r2.h> +#include <stdint.h> +#include <stdexcept> +#include <math.h> + +static void +init_jd(gc_job_desc *jd, + gc_proc_id_t proc_id, + unsigned log2_fft_length, + bool forward, + std::complex<float> *out, + const std::complex<float> *in, + const std::complex<float> *W) +{ + jd->proc_id = proc_id; + jd->input.nargs = 2; + jd->output.nargs = 0; + jd->eaa.nargs = 3; + + jd->input.arg[0].u32 = log2_fft_length; + jd->input.arg[1].u32 = forward; + unsigned int fft_length = 1 << log2_fft_length; + + jd->eaa.arg[0].ea_addr = ptr_to_ea(out); + jd->eaa.arg[0].direction = GCJD_DMA_PUT; + jd->eaa.arg[0].put_size = sizeof(std::complex<float>) * fft_length; + + jd->eaa.arg[1].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(in)); + jd->eaa.arg[1].direction = GCJD_DMA_GET; + jd->eaa.arg[1].get_size = sizeof(std::complex<float>) * fft_length; + + jd->eaa.arg[2].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(W)); + jd->eaa.arg[2].direction = GCJD_DMA_GET; + jd->eaa.arg[2].get_size = sizeof(std::complex<float>) * fft_length / 4; +} + + +gc_job_desc * +gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr, + unsigned int log2_fft_length, + bool forward, + std::complex<float> *out, + const std::complex<float> *in, + const std::complex<float> *W) +{ + unsigned int fft_length = 1 << log2_fft_length; + if (fft_length > 4096) + throw std::invalid_argument("fft_length > 4096"); + + if ((intptr_t)out & 0xf) + throw gc_bad_align("out"); + if ((intptr_t)in & 0xf) + throw gc_bad_align("in"); + if ((intptr_t)W & 0xf) + throw gc_bad_align("W"); + + gc_proc_id_t fft_id = mgr->lookup_proc("fft_1d_r2"); + gc_job_desc *jd = mgr->alloc_job_desc(); + init_jd(jd, fft_id, log2_fft_length, forward, out, in, W); + if (!mgr->submit_job(jd)){ + gc_job_status_t s = jd->status; + mgr->free_job_desc(jd); + throw gc_bad_submit("fft_1d_r2", s); + } + return jd; +} + +void +gcp_fft_1d_r2_forward_twiddle(unsigned int log2_fft_length, std::complex<float> *W) +{ + unsigned int n = 1 << log2_fft_length; + + W[0].real() = 1.0; + W[0].imag() = 0.0; + for (unsigned i=1; i < n/4; i++){ + W[i].real() = cos(i * 2*M_PI/n); + W[n/4 - i].imag() = -W[i].real(); + } +} + + +void +gcp_fft_1d_r2_reverse_twiddle(unsigned int log2_fft_length, std::complex<float> *W) +{ + // FIXME this is wrong/insufficient. inverse is still incorrect + + // reverse factors are the conjugate of the forward factors + gcp_fft_1d_r2_forward_twiddle(log2_fft_length, W); + + unsigned int n = 1 << log2_fft_length; + for (unsigned i=0; i < n/4; i++) + W[i] = conj(W[i]); +} diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.h b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h new file mode 100644 index 000000000..be1440fd4 --- /dev/null +++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h @@ -0,0 +1,66 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCP_FFT_1D_R2_H +#define INCLUDED_GCP_FFT_1D_R2_H + +#include <gc_job_manager.h> +#include <complex> + +/*! + * \brief Submit a job that computes the forward or reverse FFT. + * + * \param mgr is the job manager instance + * \param log2_fft_length is the log2 of the fft_length (4 <= x <= 13). + * \param forward is true to compute the forward xform + * \param out is the fft_length output from FFT (must be 16-byte aligned). + * \param in is the fft_length input to FFT (must be 16-byte aligned). + * \param W is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned). + * + * Returns a job descriptor which should be passed to wait_job*. + * Throws an exception in the event of a problem. + */ +gc_job_desc * +gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr, + unsigned int log2_fft_length, + bool forward, + std::complex<float> *out, + const std::complex<float> *in, + const std::complex<float> *W); + +/*! + * \brief Compute twiddle factors for forward transform. + * + * \param log2_fft_length is the log2 of the fft_length. + * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned). + */ +void +gcp_fft_1d_r2_forward_twiddle(unsigned int log2_fft_length, std::complex<float> *W); + +/*! + * \brief Compute twiddle factors for reverse transform. + * + * \param log2_fft_length is the log2 of the fft_length. + * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned). + */ +void +gcp_fft_1d_r2_reverse_twiddle(unsigned int log2_fft_length, std::complex<float> *W); + +#endif /* INCLUDED_GCP_FFT_1D_R2_H */ diff --git a/gcell/src/lib/wrapper/qa_gcell_wrapper.cc b/gcell/src/lib/wrapper/qa_gcell_wrapper.cc new file mode 100644 index 000000000..029dfbc58 --- /dev/null +++ b/gcell/src/lib/wrapper/qa_gcell_wrapper.cc @@ -0,0 +1,39 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * This class gathers together all the test cases for the lib + * directory into a single test suite. As you create new test cases, + * add them here. + */ + +#include <qa_gcell_wrapper.h> +#include <qa_gcp_fft_1d_r2.h> + +CppUnit::TestSuite * +qa_gcell_wrapper::suite() +{ + CppUnit::TestSuite *s = new CppUnit::TestSuite("wrapper"); + + s->addTest(qa_gcp_fft_1d_r2::suite()); + + return s; +} diff --git a/gcell/src/lib/wrapper/qa_gcell_wrapper.h b/gcell/src/lib/wrapper/qa_gcell_wrapper.h new file mode 100644 index 000000000..cb29db883 --- /dev/null +++ b/gcell/src/lib/wrapper/qa_gcell_wrapper.h @@ -0,0 +1,35 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_QA_GCELL_WRAPPER_H +#define INCLUDED_QA_GCELL_WRAPPER_H + +#include <cppunit/TestSuite.h> + +//! collect all the tests for the wrapper directory + +class qa_gcell_wrapper { +public: + //! return suite of tests + static CppUnit::TestSuite *suite(); +}; + + +#endif /* INCLUDED_QA_GCELL_WRAPPER_H */ diff --git a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc new file mode 100644 index 000000000..1bb676ac2 --- /dev/null +++ b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc @@ -0,0 +1,211 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "qa_gcp_fft_1d_r2.h" +#include <cppunit/TestAssert.h> +#include <gcp_fft_1d_r2.h> +#include <fftw3.h> +#include <stdio.h> +#include <stdlib.h> // random, posix_memalign +#include <algorithm> + +typedef boost::shared_ptr<void> void_sptr; + +// handle to embedded SPU executable +extern spe_program_handle_t gcell_all; + +/* + * Return pointer to cache-aligned chunk of storage of size size bytes. + * Throw if can't allocate memory. The storage should be freed + * with "free" when done. The memory is initialized to zero. + */ +static void * +aligned_alloc(size_t size, size_t alignment = 128) +{ + void *p = 0; + if (posix_memalign(&p, alignment, size) != 0){ + perror("posix_memalign"); + throw std::runtime_error("memory"); + } + memset(p, 0, size); // zero the memory + return p; +} + +class free_deleter { +public: + void operator()(void *p) { + free(p); + } +}; + +static boost::shared_ptr<void> +aligned_alloc_sptr(size_t size, size_t alignment = 128) +{ + return boost::shared_ptr<void>(aligned_alloc(size, alignment), free_deleter()); +} + +// test forward FFT +void +qa_gcp_fft_1d_r2::t1() +{ + gc_jm_options opts; + opts.program_handle = gc_program_handle_from_address(&gcell_all); + opts.nspes = 1; + gc_job_manager_sptr mgr = gc_make_job_manager(&opts); + +#if 1 + for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){ + test(mgr, log2_fft_size, true); + } +#else + test(mgr, 5, true); +#endif +} + +// test reverse FFT +void +qa_gcp_fft_1d_r2::t2() +{ + gc_jm_options opts; + opts.program_handle = gc_program_handle_from_address(&gcell_all); + opts.nspes = 1; + gc_job_manager_sptr mgr = gc_make_job_manager(&opts); + +#if 1 + for (int log2_fft_size = 5; log2_fft_size <= 12; log2_fft_size++){ + test(mgr, log2_fft_size, false); + } +#else + test(mgr, 5, false); +#endif +} + +void +qa_gcp_fft_1d_r2::t3() +{ +} + +void +qa_gcp_fft_1d_r2::t4() +{ +} + +static inline float +abs_diff(std::complex<float> x, std::complex<float> y) +{ + return std::max(std::abs(x.real()-y.real()), + std::abs(x.imag()-y.imag())); +} + +static float +float_abs_rel_error(float ref, float actual) +{ + float delta = ref - actual; + if (std::abs(ref) < 1e-18) + ref = 1e-18; + return std::abs(delta/ref); +} + +static float +abs_rel_error(std::complex<float> ref, std::complex<float> actual) +{ + return std::max(float_abs_rel_error(ref.real(), actual.real()), + float_abs_rel_error(ref.imag(), actual.imag())); +} + +void +qa_gcp_fft_1d_r2::test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward) +{ + int fft_size = 1 << log2_fft_size; + + // allocate aligned buffers with boost shared_ptr's + void_sptr fftw_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128); + void_sptr fftw_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128); + void_sptr cell_in_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128); + void_sptr cell_out_void = aligned_alloc_sptr(fft_size * sizeof(std::complex<float>), 128); + void_sptr cell_twiddle_void = aligned_alloc_sptr(fft_size/4 * sizeof(std::complex<float>), 128); + + // cast them to the type we really want + std::complex<float> *fftw_in = (std::complex<float> *) fftw_in_void.get(); + std::complex<float> *fftw_out = (std::complex<float> *) fftw_out_void.get(); + std::complex<float> *cell_in = (std::complex<float> *) cell_in_void.get(); + std::complex<float> *cell_out = (std::complex<float> *) cell_out_void.get(); + std::complex<float> *cell_twiddle = (std::complex<float> *) cell_twiddle_void.get(); + + if (forward) + gcp_fft_1d_r2_forward_twiddle(log2_fft_size, cell_twiddle); + else + gcp_fft_1d_r2_reverse_twiddle(log2_fft_size, cell_twiddle); + + srandom(1); // we want reproducibility + + // initialize the input buffers + for (int i = 0; i < fft_size; i++){ + std::complex<float> t((float) (random() & 0xfffff), (float) (random() & 0xfffff)); + fftw_in[i] = t; + cell_in[i] = t; + } + + // ------------------------------------------------------------------------ + // compute the reference answer + fftwf_plan plan = fftwf_plan_dft_1d (fft_size, + reinterpret_cast<fftwf_complex *>(fftw_in), + reinterpret_cast<fftwf_complex *>(fftw_out), + forward ? FFTW_FORWARD : FFTW_BACKWARD, + FFTW_ESTIMATE); + if (plan == 0){ + fprintf(stderr, "qa_gcp_fft_1d_r2: error creating FFTW plan\n"); + throw std::runtime_error ("fftwf_plan_dft_r2c_1d failed"); + } + + fftwf_execute(plan); + fftwf_destroy_plan(plan); + + // ------------------------------------------------------------------------ + // compute the answer on the cell + gc_job_desc *jd = gcp_fft_1d_r2_submit(mgr, log2_fft_size, forward, + cell_out, cell_in, cell_twiddle); + if (!mgr->wait_job(jd)){ + fprintf(stderr, "wait_job failed: %s\n", gc_job_status_string(jd->status).c_str()); + mgr->free_job_desc(jd); + CPPUNIT_ASSERT(0); + } + mgr->free_job_desc(jd); + + // ------------------------------------------------------------------------ + // compute the maximum of the relative error + float max_rel = 0.0; + for (int i = 0; i < fft_size; i++){ + max_rel = std::max(max_rel, abs_rel_error(fftw_out[i], cell_out[i])); + if (0) + printf("(%16.3f, %16.3fj) (%16.3f, %16.3fj) (%16.3f, %16.3fj)\n", + fftw_out[i].real(), fftw_out[i].imag(), + cell_out[i].real(), cell_out[i].imag(), + fftw_out[i].real() - cell_out[i].real(), + fftw_out[i].imag() - cell_out[i].imag()); + } + + fprintf(stdout, "%s fft_size = %4d max_rel_error = %e\n", + forward ? "fwd" : "rev", fft_size, max_rel); + + // CPPUNIT_ASSERT(max_rel <= 1e-4); + +} diff --git a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h new file mode 100644 index 000000000..38beafb21 --- /dev/null +++ b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_QA_GCP_FFT_1D_R2_H +#define INCLUDED_QA_GCP_FFT_1D_R2_H + +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/TestCase.h> +#include <gc_job_manager.h> + +class qa_gcp_fft_1d_r2 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE(qa_gcp_fft_1d_r2); + CPPUNIT_TEST(t1); + CPPUNIT_TEST(t2); + CPPUNIT_TEST(t3); + CPPUNIT_TEST(t4); + CPPUNIT_TEST_SUITE_END(); + + private: + void t1(); + void t2(); + void t3(); + void t4(); + + void test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward); +}; + + + +#endif /* INCLUDED_QA_GCP_FFT_1D_R2_H */ diff --git a/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c new file mode 100644 index 000000000..36bd878ed --- /dev/null +++ b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c @@ -0,0 +1,39 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gc_declare_proc.h> +#include <libfft.h> + +static void +gcs_fft_1d_r2(const gc_job_direct_args_t *input, + gc_job_direct_args_t *output __attribute__((unused)), + const gc_job_ea_args_t *eaa) +{ + vector float *out = (vector float *) eaa->arg[0].ls_addr; + vector float *in = (vector float *) eaa->arg[1].ls_addr; + vector float *W = (vector float *) eaa->arg[2].ls_addr; + int log2_fft_length = input->arg[0].u32; + int forward = input->arg[1].u32; // non-zero if forward xform (FIXME use) + + fft_1d_r2(out, in, W, log2_fft_length); +} + +GC_DECLARE_PROC(gcs_fft_1d_r2, "fft_1d_r2"); |