diff options
author | Tom Rondeau | 2010-03-13 16:23:34 -0500 |
---|---|---|
committer | Tom Rondeau | 2010-03-13 16:23:34 -0500 |
commit | c7b26f667de792209225b8244e92842b2399368c (patch) | |
tree | ea501fb89476a62a11e41b5b96871b414fcff499 | |
parent | 1933148ce1c78a81b1299c05d540a77b31325d92 (diff) | |
download | gnuradio-c7b26f667de792209225b8244e92842b2399368c.tar.gz gnuradio-c7b26f667de792209225b8244e92842b2399368c.tar.bz2 gnuradio-c7b26f667de792209225b8244e92842b2399368c.zip |
Now have a FFT filter implemented in SSE and generic version that can be switched into FFT filter block.
8 files changed, 555 insertions, 1 deletions
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc index ad514ee37..3c92c6a25 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc @@ -31,6 +31,7 @@ #include <gr_fft_filter_ccc.h> #include <gri_fft_filter_ccc_sse.h> +#include <gri_fft_filter_ccc_generic.h> #include <gr_io_signature.h> #include <gri_fft.h> #include <math.h> @@ -56,7 +57,11 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation, const std::vector<gr_compl d_updated(false) { set_history(1); +#if 0 + d_filter = new gri_fft_filter_ccc_generic(decimation, taps); +#else d_filter = new gri_fft_filter_ccc_sse(decimation, taps); +#endif d_nsamples = d_filter->set_taps(taps); set_output_multiple(d_nsamples); } diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h index 18c10dfd3..c037edebe 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h @@ -29,6 +29,7 @@ typedef boost::shared_ptr<gr_fft_filter_ccc> gr_fft_filter_ccc_sptr; gr_fft_filter_ccc_sptr gr_make_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps); class gri_fft_filter_ccc_sse; +class gri_fft_filter_ccc_generic; /*! * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps @@ -41,7 +42,11 @@ class gr_fft_filter_ccc : public gr_sync_decimator int d_nsamples; bool d_updated; +#if 0 + gri_fft_filter_ccc_generic *d_filter; +#else gri_fft_filter_ccc_sse *d_filter; +#endif std::vector<gr_complex> d_new_taps; /*! diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc index eda3d2147..914744e8e 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc @@ -26,6 +26,7 @@ #include <gr_fft_filter_fff.h> #include <gri_fft_filter_fff_generic.h> +#include <gri_fft_filter_fff_sse.h> #include <gr_io_signature.h> #include <assert.h> #include <stdexcept> @@ -48,8 +49,13 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation, const std::vector<float> & d_updated(false) { set_history(1); + +#if 1 + d_filter = new gri_fft_filter_fff_generic(decimation, taps); +#else + d_filter = new gri_fft_filter_fff_sse(decimation, taps); +#endif - d_filter = new gri_fft_filter_fff_generic(decimation, taps); d_nsamples = d_filter->set_taps(taps); set_output_multiple(d_nsamples); } diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h index 6c9fcc04b..2059d704b 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h @@ -29,6 +29,7 @@ typedef boost::shared_ptr<gr_fft_filter_fff> gr_fft_filter_fff_sptr; gr_fft_filter_fff_sptr gr_make_fft_filter_fff (int decimation, const std::vector<float> &taps); class gri_fft_filter_fff_generic; +class gri_fft_filter_fff_sse; /*! * \brief Fast FFT filter with float input, float output and float taps @@ -41,7 +42,11 @@ class gr_fft_filter_fff : public gr_sync_decimator int d_nsamples; bool d_updated; +#if 1 gri_fft_filter_fff_generic *d_filter; +#else + gri_fft_filter_fff_sse *d_filter; +#endif std::vector<float> d_new_taps; /*! diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc new file mode 100644 index 000000000..b7d925ff3 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc @@ -0,0 +1,186 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_ccc_sse.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> +#include <xmmintrin.h> +#include <fftw3.h> + +gri_fft_filter_ccc_sse::gri_fft_filter_ccc_sse (int decimation, + const std::vector<gr_complex> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex)); + set_taps(taps); +} + +gri_fft_filter_ccc_sse::~gri_fft_filter_ccc_sse () +{ + fftwf_free(d_xformed_taps); + delete d_fwdfft; + delete d_invfft; +} + +#if 0 +static void +print_vector_complex(const std::string label, const std::vector<gr_complex> &x) +{ + std::cout << label; + for (unsigned i = 0; i < x.size(); i++) + std::cout << x[i] << " "; + std::cout << "\n"; +} +#endif + + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_ccc_sse::set_taps (const std::vector<gr_complex> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + gr_complex *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_ccc_sse::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_ccc_sse: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_complex(d_fftsize, true); + d_invfft = new gri_fft_complex(d_fftsize, false); + + fftwf_free(d_xformed_taps); + d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize)*sizeof(gr_complex)); + } +} + +int +gri_fft_filter_ccc_sse::filter (int nitems, const gr_complex *input, gr_complex *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(gr_complex)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + float *a = (float*)(d_fwdfft->get_outbuf()); + float *b = (float*)(&d_xformed_taps[0]); + float *c = (float*)(d_invfft->get_inbuf()); + + __m128 x0, x1, x2, t0, t1, m; + m = _mm_set_ps(-1, 1, -1, 1); + for (j = 0; j < 2*d_fftsize; j+=4) { // filter in the freq domain + x0 = _mm_load_ps(&a[j]); + t0 = _mm_load_ps(&b[j]); + + t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1)); + t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0)); + t1 = _mm_mul_ps(t1, m); + + x1 = _mm_mul_ps(x0, t0); + x2 = _mm_mul_ps(x0, t1); + + x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)); + x2 = _mm_add_ps(x1, x2); + + _mm_store_ps(&c[j], x2); + } + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(gr_complex)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h new file mode 100644 index 000000000..d1c54f01f --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_CCC_SSE_H +#define INCLUDED_GRI_FFT_FILTER_CCC_SSE_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_complex; + +/*! + * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps + * \ingroup filter_blk + */ +class gri_fft_filter_ccc_sse +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_complex *d_fwdfft; // forward "plan" + gri_fft_complex *d_invfft; // inverse "plan" + std::vector<gr_complex> d_tail; // state carried between blocks for overlap-add + gr_complex *d_xformed_taps; + std::vector<gr_complex> d_new_taps; + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct an FFT filter for complex vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for complex vectors (such as gr_fft_filter_ccc). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (complex) + */ + gri_fft_filter_ccc_sse (int decimation, const std::vector<gr_complex> &taps); + ~gri_fft_filter_ccc_sse (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (complex) + */ + int set_taps (const std::vector<gr_complex> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const gr_complex *input, gr_complex *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_CCC_SSE_H */ diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc new file mode 100644 index 000000000..2680e6594 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc @@ -0,0 +1,184 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_fff_sse.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> +#include <xmmintrin.h> +#include <fftw3.h> + +gri_fft_filter_fff_sse::gri_fft_filter_fff_sse (int decimation, + const std::vector<float> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex)); + set_taps(taps); +} + +gri_fft_filter_fff_sse::~gri_fft_filter_fff_sse () +{ + fftwf_free(d_xformed_taps); + delete d_fwdfft; + delete d_invfft; +} + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_fff_sse::set_taps (const std::vector<float> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + float *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize/2+1; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_fff_sse::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_fff_sse: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_real_fwd(d_fftsize); + d_invfft = new gri_fft_real_rev(d_fftsize); + //d_xformed_taps.resize(d_fftsize/2+1); + + fftwf_free(d_xformed_taps); + d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize/2+1)*sizeof(gr_complex)); + } +} + +int +gri_fft_filter_fff_sse::filter (int nitems, const float *input, float *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + float *a = (float*)(d_fwdfft->get_outbuf()); + float *b = (float*)(&d_xformed_taps[0]); + float *c = (float*)(d_invfft->get_inbuf()); + + __m128 x0, x1, x2, t0, t1, m; + m = _mm_set_ps(-1, 1, -1, 1); + for (j = 0; j < d_fftsize; j+=4) { // filter in the freq domain + x0 = _mm_load_ps(&a[j]); + t0 = _mm_load_ps(&b[j]); + + t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1)); + t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0)); + t1 = _mm_mul_ps(t1, m); + + x1 = _mm_mul_ps(x0, t0); + x2 = _mm_mul_ps(x0, t1); + + x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)); + x2 = _mm_add_ps(x1, x2); + + _mm_store_ps(&c[j], x2); + } + + // Finish off the last one; do the complex multiply as floats + j = d_fftsize/2; + c[j] = (a[j] * b[j]) - (a[j+1] * b[j+1]); + c[j+1] = (a[j] * b[j+1]) + (a[j+1] * b[j]); + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + + //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float)); + //out += d_nsamples; + + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(float)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h new file mode 100644 index 000000000..8258bb824 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h @@ -0,0 +1,81 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_FFF_SSE_H +#define INCLUDED_GRI_FFT_FILTER_FFF_SSE_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_real_fwd; +class gri_fft_real_rev; + +class gri_fft_filter_fff_sse +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_real_fwd *d_fwdfft; // forward "plan" + gri_fft_real_rev *d_invfft; // inverse "plan" + std::vector<float> d_tail; // state carried between blocks for overlap-add + //std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps + gr_complex *d_xformed_taps; + std::vector<float> d_new_taps; + + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct a FFT filter for float vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for floating point vectors (such as gr_fft_filter_fff). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (float) + */ + gri_fft_filter_fff_sse (int decimation, const std::vector<float> &taps); + ~gri_fft_filter_fff_sse (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (float) + */ + int set_taps (const std::vector<float> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const float *input, float *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_FFF_SSE_H */ |