From 8f2a5f3837da61a4d49251ee65f91f0d2e8e48de Mon Sep 17 00:00:00 2001 From: eb Date: Wed, 16 Apr 2008 03:48:33 +0000 Subject: Merged gcell-wip -r8159:8202 into trunk. This includes the following changes: * gc_make_job_manager now returns a boost::shared_ptr * opts.program_handle is now a boost::shared_ptr * two new functions for getting a program handle * look_proc and alloc_job_desc now throw on error * static methods for setting and getting a single job manager * new exception hierarchy * mv gcell/src/lib/procs gcell/src/lib/wrapper * added libfft. Currently inverse xform is broken * gcell-embedspu-libtool creates libtool complaint .ko's from SPE executables git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8209 221aa14e-8319-0410-a670-987f0aec2ac5 --- gcell/src/lib/general/spu/fft_1d.h | 103 +++++++ gcell/src/lib/general/spu/fft_1d_r2.c | 35 +++ gcell/src/lib/general/spu/fft_1d_r2.h | 529 ++++++++++++++++++++++++++++++++++ gcell/src/lib/general/spu/libfft.h | 113 ++++++++ 4 files changed, 780 insertions(+) create mode 100644 gcell/src/lib/general/spu/fft_1d.h create mode 100644 gcell/src/lib/general/spu/fft_1d_r2.c create mode 100644 gcell/src/lib/general/spu/fft_1d_r2.h create mode 100644 gcell/src/lib/general/spu/libfft.h (limited to 'gcell/src/lib/general/spu') diff --git a/gcell/src/lib/general/spu/fft_1d.h b/gcell/src/lib/general/spu/fft_1d.h new file mode 100644 index 000000000..355b84bf1 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d.h @@ -0,0 +1,103 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_H_ +#define _FFT_1D_H_ 1 + +#include + +/* BIT_SWAP - swaps up to 16 bits of the integer _i according to the + * pattern specified by _pat. + */ +#define BIT_SWAP(_i, _pat) spu_extract(spu_gather(spu_shuffle(spu_maskb(_i), _pat, _pat)), 0) + + +#ifndef MAX_FFT_1D_SIZE +#define MAX_FFT_1D_SIZE 8192 +#endif + +#ifndef INV_SQRT_2 +#define INV_SQRT_2 0.7071067811865 +#endif + + +/* The following macro, FFT_1D_BUTTERFLY, performs a 4 way SIMD basic butterfly + * operation. The inputs are in parallel arrays (seperate real and imaginary + * vectors). + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + */ + +#define FFT_1D_BUTTERFLY(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_msub(_q_re, _W_re, spu_mul(_q_im, _W_im)); \ + _qw_im = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + + +/* FFT_1D_BUTTERFLY_HI is equivalent to FFT_1D_BUTTERFLY with twiddle factors (W_im, -W_re) + */ +#define FFT_1D_BUTTERFLY_HI(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _qw_im = spu_msub(_q_im, _W_im, spu_mul(_q_re, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + +#endif /* _FFT_1D_H_ */ diff --git a/gcell/src/lib/general/spu/fft_1d_r2.c b/gcell/src/lib/general/spu/fft_1d_r2.c new file mode 100644 index 000000000..a0660b307 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d_r2.c @@ -0,0 +1,35 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +/* + * invoke the inline version + */ +void +fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) +{ + assert((1 << log2_size) <= MAX_FFT_1D_SIZE); + + _fft_1d_r2(out, in, W, log2_size); +} diff --git a/gcell/src/lib/general/spu/fft_1d_r2.h b/gcell/src/lib/general/spu/fft_1d_r2.h new file mode 100644 index 000000000..a51cbc341 --- /dev/null +++ b/gcell/src/lib/general/spu/fft_1d_r2.h @@ -0,0 +1,529 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_R2_H_ +#define _FFT_1D_R2_H_ 1 + +#include "fft_1d.h" + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input is an array of complex numbers of length (1<. Note: This routine can support an in-place transformation + * by specifying and to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i> 1; + n_4 = n >> 2; + n_8 = n >> 3; + n_16 = n >> 4; + + n_3_16 = n_8 + n_16; + + /* Compute a byte reverse shuffle pattern to be used to produce + * an address bit swap. + */ + reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size), + spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8, + 7, 6, 5, 4, 3, 2,1,0}), + log2_size-16)); + + /* Perform the first 3 stages of the FFT. These stages differs from + * other stages in that the inputs are unscrambled and the data is + * reformated into parallel arrays (ie, seperate real and imaginary + * arrays). The term "unscramble" means the bit address reverse the + * data array. In addition, the first three stages have simple twiddle + * weighting factors. + * stage 1: (1, 0) + * stage 2: (1, 0) and (0, -1) + * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707) + * + * The arrays are processed as two halves, simultaneously. The lo (first + * half) and hi (second half). This is done because the scramble + * shares source value between each half of the output arrays. + */ + i = 0; + i_rev = 0; + + in0 = in; + in1 = in + n_8; + in2 = in + n_16; + in3 = in + n_3_16; + + in4 = in + n_4; + in5 = in1 + n_4; + in6 = in2 + n_4; + in7 = in3 + n_4; + + re0 = re; + re1 = re + n_8; + im0 = im; + im1 = im + n_8; + + w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2}; + w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2}; + + do { + src_lo0 = in0[i_rev]; + src_lo1 = in1[i_rev]; + src_lo2 = in2[i_rev]; + src_lo3 = in3[i_rev]; + + src_hi0 = in4[i_rev]; + src_hi1 = in5[i_rev]; + src_hi2 = in6[i_rev]; + src_hi3 = in7[i_rev]; + + /* Perform scramble. + */ + dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo); + dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi); + dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo); + dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi); + dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo); + dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi); + dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo); + dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi); + + /* Perform the stage 1 butterfly. The multiplier constant, ppmm, + * is used to control the sign of the operands since a single + * quadword contains both of P and Q valule of the butterfly. + */ + pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8)); + pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8)); + pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8)); + pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8)); + pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8)); + pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8)); + pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8)); + pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8)); + + /* Perfrom the stage 2 butterfly. For this stage, the + * inputs pq are still interleaved (p.real, p.imag, q.real, + * q.imag), so we must first re-order the data into + * parallel arrays as well as perform the reorder + * associated with the twiddle W[n/4], which equals + * (0, -1). + * + * ie. (A, B) * (0, -1) => (B, -A) + */ + re_lo0 = spu_madd(ppmm, + spu_shuffle(pq_lo1, pq_lo1, shuf_0303), + spu_shuffle(pq_lo0, pq_lo0, shuf_0202)); + im_lo0 = spu_madd(pmmp, + spu_shuffle(pq_lo1, pq_lo1, shuf_1212), + spu_shuffle(pq_lo0, pq_lo0, shuf_1313)); + + re_lo1 = spu_madd(ppmm, + spu_shuffle(pq_lo3, pq_lo3, shuf_0303), + spu_shuffle(pq_lo2, pq_lo2, shuf_0202)); + im_lo1 = spu_madd(pmmp, + spu_shuffle(pq_lo3, pq_lo3, shuf_1212), + spu_shuffle(pq_lo2, pq_lo2, shuf_1313)); + + + re_hi0 = spu_madd(ppmm, + spu_shuffle(pq_hi1, pq_hi1, shuf_0303), + spu_shuffle(pq_hi0, pq_hi0, shuf_0202)); + im_hi0 = spu_madd(pmmp, + spu_shuffle(pq_hi1, pq_hi1, shuf_1212), + spu_shuffle(pq_hi0, pq_hi0, shuf_1313)); + + re_hi1 = spu_madd(ppmm, + spu_shuffle(pq_hi3, pq_hi3, shuf_0303), + spu_shuffle(pq_hi2, pq_hi2, shuf_0202)); + im_hi1 = spu_madd(pmmp, + spu_shuffle(pq_hi3, pq_hi3, shuf_1212), + spu_shuffle(pq_hi2, pq_hi2, shuf_1313)); + + + /* Perform stage 3 butterfly. + */ + FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + re0 += 2; + re1 += 2; + im0 += 2; + im1 += 2; + + i += 8; + i_rev = BIT_SWAP(i, reverse) / 2; + } while (i < n_2); + + /* Process stages 4 to log2_size-2 + */ + for (stage=4, stride=4; stage> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + W0 = W; + W1 = W + w_stride; + W2 = W + w_2stride; + W3 = W + w_3stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i. + * + * This loop has been manually unrolled by 2 to improve + * dual issue rates and reduce stalls. This unrolling + * forces a minimum FFT size of 32. + */ + re0 = re; + re1 = re + n_8; + re2 = re + n_16; + re3 = re + n_3_16; + + im0 = im; + im1 = im + n_8; + im2 = im + n_16; + im3 = im + n_3_16; + + out0 = out; + out1 = out + n_4; + out2 = out + n_8; + out3 = out1 + n_8; + + i = n_16; + + do { + /* Fetch the twiddle factors + */ + w0 = W[0]; + w1 = W[1]; + w2 = W[2]; + w3 = W[3]; + + W += 4; + + w0_re = spu_shuffle(w0, w1, shuf_0246); + w0_im = spu_shuffle(w0, w1, shuf_1357); + w1_re = spu_shuffle(w2, w3, shuf_0246); + w1_im = spu_shuffle(w2, w3, shuf_1357); + + /* Fetch the butterfly inputs, reals and imaginaries + */ + re_lo0 = re0[0]; im_lo0 = im0[0]; + re_lo1 = re1[0]; im_lo1 = im1[0]; + re_lo2 = re0[1]; im_lo2 = im0[1]; + re_lo3 = re1[1]; im_lo3 = im1[1]; + + re_hi0 = re2[0]; im_hi0 = im2[0]; + re_hi1 = re3[0]; im_hi1 = im3[0]; + re_hi2 = re2[1]; im_hi2 = im2[1]; + re_hi3 = re3[1]; im_hi3 = im3[1]; + + re0 += 2; im0 += 2; + re1 += 2; im1 += 2; + re2 += 2; im2 += 2; + re3 += 2; im3 += 2; + + /* Perform the butterflys + */ + FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im); + + FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im); + + /* Interleave the results and store them into the output buffers (ie, + * the original input buffers. + */ + out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415); + out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637); + out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415); + out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637); + + out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415); + out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637); + out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415); + out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637); + + out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415); + out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637); + out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415); + out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637); + + out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415); + out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637); + out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415); + out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637); + + out0 += 4; + out1 += 4; + out2 += 4; + out3 += 4; + + i -= 2; + } while (i); +} + +#endif /* _FFT_1D_R2_H_ */ diff --git a/gcell/src/lib/general/spu/libfft.h b/gcell/src/lib/general/spu/libfft.h new file mode 100644 index 000000000..dd387be0c --- /dev/null +++ b/gcell/src/lib/general/spu/libfft.h @@ -0,0 +1,113 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2008 Free Software Foundation, Inc. */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ + +#ifndef INCLUDED_LIBFFT_H +#define INCLUDED_LIBFFT_H + +// must be defined before inclusion of fft_1d_r2.h +#define MAX_FFT_1D_SIZE 4096 + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input is an array of complex numbers of length (1<. Note: This routine can support an in-place transformation + * by specifying and to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i