diff options
-rw-r--r-- | gnuradio-core/src/lib/filter/Makefile.am | 4 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc | 143 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h | 21 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc | 157 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gr_fft_filter_fff.h | 22 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc | 166 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h | 82 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc | 186 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h | 82 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc | 157 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h | 80 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc | 184 | ||||
-rw-r--r-- | gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h | 81 |
13 files changed, 1073 insertions, 292 deletions
diff --git a/gnuradio-core/src/lib/filter/Makefile.am b/gnuradio-core/src/lib/filter/Makefile.am index 9cd6e9f38..23c1dadc3 100644 --- a/gnuradio-core/src/lib/filter/Makefile.am +++ b/gnuradio-core/src/lib/filter/Makefile.am @@ -184,6 +184,8 @@ libfilter_la_common_SOURCES = \ $(GENERATED_CC) \ gr_adaptive_fir_ccf.cc \ gr_cma_equalizer_cc.cc \ + gri_fft_filter_fff_generic.cc \ + gri_fft_filter_ccc_generic.cc \ gr_fft_filter_ccc.cc \ gr_fft_filter_fff.cc \ gr_goertzel_fc.cc \ @@ -259,6 +261,8 @@ grinclude_HEADERS = \ gr_altivec.h \ gr_cma_equalizer_cc.h \ gr_cpu.h \ + gri_fft_filter_fff_generic.h \ + gri_fft_filter_ccc_generic.h \ gr_fft_filter_ccc.h \ gr_fft_filter_fff.h \ gr_filter_delay_fc.h \ diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc index 3dd40d56d..4540c6e4a 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc @@ -30,6 +30,8 @@ #endif #include <gr_fft_filter_ccc.h> +//#include <gri_fft_filter_ccc_sse.h> +#include <gri_fft_filter_ccc_generic.h> #include <gr_io_signature.h> #include <gri_fft.h> #include <math.h> @@ -52,32 +54,23 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation, const std::vector<gr_compl gr_make_io_signature (1, 1, sizeof (gr_complex)), gr_make_io_signature (1, 1, sizeof (gr_complex)), decimation), - d_fftsize(-1), d_fwdfft(0), d_invfft(0), d_updated(false) + d_updated(false) { - // if (decimation != 1) - // throw std::invalid_argument("gr_fft_filter_ccc: decimation must be 1"); - set_history(1); - actual_set_taps(taps); +#if 1 // don't enable the sse version until handling it is worked out + d_filter = new gri_fft_filter_ccc_generic(decimation, taps); +#else + d_filter = new gri_fft_filter_ccc_sse(decimation, taps); +#endif + d_nsamples = d_filter->set_taps(taps); + set_output_multiple(d_nsamples); } gr_fft_filter_ccc::~gr_fft_filter_ccc () { - delete d_fwdfft; - delete d_invfft; + delete d_filter; } -#if 0 -static void -print_vector_complex(const std::string label, const std::vector<gr_complex> &x) -{ - std::cout << label; - for (unsigned i = 0; i < x.size(); i++) - std::cout << x[i] << " "; - std::cout << "\n"; -} -#endif - void gr_fft_filter_ccc::set_taps (const std::vector<gr_complex> &taps) { @@ -85,130 +78,26 @@ gr_fft_filter_ccc::set_taps (const std::vector<gr_complex> &taps) d_updated = true; } -/* - * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps - */ -void -gr_fft_filter_ccc::actual_set_taps (const std::vector<gr_complex> &taps) -{ - int i = 0; - compute_sizes(taps.size()); - - d_tail.resize(tailsize()); - for (i = 0; i < tailsize(); i++) - d_tail[i] = 0; - - gr_complex *in = d_fwdfft->get_inbuf(); - gr_complex *out = d_fwdfft->get_outbuf(); - - float scale = 1.0 / d_fftsize; - - // Compute forward xform of taps. - // Copy taps into first ntaps slots, then pad with zeros - for (i = 0; i < d_ntaps; i++) - in[i] = taps[i] * scale; - - for (; i < d_fftsize; i++) - in[i] = 0; - - d_fwdfft->execute(); // do the xform - - // now copy output to d_xformed_taps - for (i = 0; i < d_fftsize; i++) - d_xformed_taps[i] = out[i]; - - //print_vector_complex("transformed taps:", d_xformed_taps); -} - -// determine and set d_ntaps, d_nsamples, d_fftsize - -void -gr_fft_filter_ccc::compute_sizes(int ntaps) -{ - int old_fftsize = d_fftsize; - d_ntaps = ntaps; - d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); - d_nsamples = d_fftsize - d_ntaps + 1; - - if (0) - fprintf(stderr, "gr_fft_filter: ntaps = %d, fftsize = %d, nsamples = %d\n", - d_ntaps, d_fftsize, d_nsamples); - - assert(d_fftsize == d_ntaps + d_nsamples -1 ); - - if (d_fftsize != old_fftsize){ // compute new plans - delete d_fwdfft; - delete d_invfft; - d_fwdfft = new gri_fft_complex(d_fftsize, true); - d_invfft = new gri_fft_complex(d_fftsize, false); - d_xformed_taps.resize(d_fftsize); - } - - set_output_multiple(d_nsamples); -} - int gr_fft_filter_ccc::work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { - gr_complex *in = (gr_complex *) input_items[0]; + const gr_complex *in = (const gr_complex *) input_items[0]; gr_complex *out = (gr_complex *) output_items[0]; if (d_updated){ - actual_set_taps(d_new_taps); + d_nsamples = d_filter->set_taps(d_new_taps); d_updated = false; + set_output_multiple(d_nsamples); return 0; // output multiple may have changed } assert(noutput_items % d_nsamples == 0); - int dec_ctr = 0; - int j = 0; - int ninput_items = noutput_items * decimation(); - - for (int i = 0; i < ninput_items; i += d_nsamples){ - - memcpy(d_fwdfft->get_inbuf(), &in[i], d_nsamples * sizeof(gr_complex)); - - for (j = d_nsamples; j < d_fftsize; j++) - d_fwdfft->get_inbuf()[j] = 0; - - d_fwdfft->execute(); // compute fwd xform - - gr_complex *a = d_fwdfft->get_outbuf(); - gr_complex *b = &d_xformed_taps[0]; - gr_complex *c = d_invfft->get_inbuf(); - - for (j = 0; j < d_fftsize; j++) // filter in the freq domain - c[j] = a[j] * b[j]; - - d_invfft->execute(); // compute inv xform - - // add in the overlapping tail - - for (j = 0; j < tailsize(); j++) - d_invfft->get_outbuf()[j] += d_tail[j]; - - // copy nsamples to output - - //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(gr_complex)); - //out += d_nsamples; - - j = dec_ctr; - while (j < d_nsamples) { - *out++ = d_invfft->get_outbuf()[j]; - j += decimation(); - } - dec_ctr = (j - d_nsamples); - - // stash the tail - memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, - tailsize() * sizeof(gr_complex)); - } + d_filter->filter(noutput_items, in, out); - assert((out - (gr_complex *) output_items[0]) == noutput_items); - assert(dec_ctr == 0); + //assert((out - (gr_complex *) output_items[0]) == noutput_items); return noutput_items; } diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h index c5363dcbb..68b19e775 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h @@ -28,8 +28,8 @@ class gr_fft_filter_ccc; typedef boost::shared_ptr<gr_fft_filter_ccc> gr_fft_filter_ccc_sptr; gr_fft_filter_ccc_sptr gr_make_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps); -class gr_fir_ccc; -class gri_fft_complex; +//class gri_fft_filter_ccc_sse; +class gri_fft_filter_ccc_generic; /*! * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps @@ -40,15 +40,14 @@ class gr_fft_filter_ccc : public gr_sync_decimator private: friend gr_fft_filter_ccc_sptr gr_make_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps); - int d_ntaps; int d_nsamples; - int d_fftsize; // fftsize = ntaps + nsamples - 1 - gri_fft_complex *d_fwdfft; // forward "plan" - gri_fft_complex *d_invfft; // inverse "plan" - std::vector<gr_complex> d_tail; // state carried between blocks for overlap-add - std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps - std::vector<gr_complex> d_new_taps; bool d_updated; +#if 1 // don't enable the sse version until handling it is worked out + gri_fft_filter_ccc_generic *d_filter; +#else + gri_fft_filter_ccc_sse *d_filter; +#endif + std::vector<gr_complex> d_new_taps; /*! * Construct a FFT filter with the given taps @@ -58,10 +57,6 @@ class gr_fft_filter_ccc : public gr_sync_decimator */ gr_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps); - void compute_sizes(int ntaps); - int tailsize() const { return d_ntaps - 1; } - void actual_set_taps (const std::vector<gr_complex> &taps); - public: ~gr_fft_filter_ccc (); diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc index 57232f3fb..e8857fe8c 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2005 Free Software Foundation, Inc. + * Copyright 2005,2010 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -25,13 +25,11 @@ #endif #include <gr_fft_filter_fff.h> +#include <gri_fft_filter_fff_generic.h> +//#include <gri_fft_filter_fff_sse.h> #include <gr_io_signature.h> -#include <gri_fft.h> -#include <math.h> #include <assert.h> #include <stdexcept> -#include <gr_firdes.h> - #include <cstdio> #include <iostream> @@ -48,37 +46,24 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation, const std::vector<float> & gr_make_io_signature (1, 1, sizeof (float)), gr_make_io_signature (1, 1, sizeof (float)), decimation), - d_fftsize(-1), d_fwdfft(0), d_invfft(0), d_updated(false) + d_updated(false) { set_history(1); - actual_set_taps(taps); -} - -gr_fft_filter_fff::~gr_fft_filter_fff () -{ - delete d_fwdfft; - delete d_invfft; -} + +#if 1 // don't enable the sse version until handling it is worked out + d_filter = new gri_fft_filter_fff_generic(decimation, taps); +#else + d_filter = new gri_fft_filter_fff_sse(decimation, taps); +#endif -#if 0 -static void -print_vector_complex(const std::string label, const std::vector<gr_complex> &x) -{ - std::cout << label; - for (unsigned i = 0; i < x.size(); i++) - std::cout << x[i] << " "; - std::cout << "\n"; + d_nsamples = d_filter->set_taps(taps); + set_output_multiple(d_nsamples); } -static void -print_vector_float(const std::string label, const std::vector<float> &x) +gr_fft_filter_fff::~gr_fft_filter_fff () { - std::cout << label; - for (unsigned i = 0; i < x.size(); i++) - std::cout << x[i] << " "; - std::cout << "\n"; + delete d_filter; } -#endif void gr_fft_filter_fff::set_taps (const std::vector<float> &taps) @@ -87,68 +72,6 @@ gr_fft_filter_fff::set_taps (const std::vector<float> &taps) d_updated = true; } -/* - * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps - */ -void -gr_fft_filter_fff::actual_set_taps (const std::vector<float> &taps) -{ - int i = 0; - compute_sizes(taps.size()); - - d_tail.resize(tailsize()); - for (i = 0; i < tailsize(); i++) - d_tail[i] = 0; - - float *in = d_fwdfft->get_inbuf(); - gr_complex *out = d_fwdfft->get_outbuf(); - - float scale = 1.0 / d_fftsize; - - // Compute forward xform of taps. - // Copy taps into first ntaps slots, then pad with zeros - for (i = 0; i < d_ntaps; i++) - in[i] = taps[i] * scale; - - for (; i < d_fftsize; i++) - in[i] = 0; - - d_fwdfft->execute(); // do the xform - - // now copy output to d_xformed_taps - for (i = 0; i < d_fftsize/2+1; i++) - d_xformed_taps[i] = out[i]; - - //print_vector_complex("transformed taps:", d_xformed_taps); -} - -// determine and set d_ntaps, d_nsamples, d_fftsize - -void -gr_fft_filter_fff::compute_sizes(int ntaps) -{ - int old_fftsize = d_fftsize; - d_ntaps = ntaps; - d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); - d_nsamples = d_fftsize - d_ntaps + 1; - - if (0) - fprintf(stderr, "gr_fft_filter: ntaps = %d, fftsize = %d, nsamples = %d\n", - d_ntaps, d_fftsize, d_nsamples); - - assert(d_fftsize == d_ntaps + d_nsamples -1 ); - - if (d_fftsize != old_fftsize){ // compute new plans - delete d_fwdfft; - delete d_invfft; - d_fwdfft = new gri_fft_real_fwd(d_fftsize); - d_invfft = new gri_fft_real_rev(d_fftsize); - d_xformed_taps.resize(d_fftsize/2+1); - } - - set_output_multiple(d_nsamples); -} - int gr_fft_filter_fff::work (int noutput_items, gr_vector_const_void_star &input_items, @@ -158,59 +81,17 @@ gr_fft_filter_fff::work (int noutput_items, float *out = (float *) output_items[0]; if (d_updated){ - actual_set_taps(d_new_taps); + d_nsamples = d_filter->set_taps(d_new_taps); d_updated = false; + set_output_multiple(d_nsamples); return 0; // output multiple may have changed } assert(noutput_items % d_nsamples == 0); + + d_filter->filter(noutput_items, in, out); - int dec_ctr = 0; - int j = 0; - int ninput_items = noutput_items * decimation(); - - for (int i = 0; i < ninput_items; i += d_nsamples){ - - memcpy(d_fwdfft->get_inbuf(), &in[i], d_nsamples * sizeof(float)); - - for (j = d_nsamples; j < d_fftsize; j++) - d_fwdfft->get_inbuf()[j] = 0; - - d_fwdfft->execute(); // compute fwd xform - - gr_complex *a = d_fwdfft->get_outbuf(); - gr_complex *b = &d_xformed_taps[0]; - gr_complex *c = d_invfft->get_inbuf(); - - for (j = 0; j < d_fftsize/2+1; j++) // filter in the freq domain - c[j] = a[j] * b[j]; - - d_invfft->execute(); // compute inv xform - - // add in the overlapping tail - - for (j = 0; j < tailsize(); j++) - d_invfft->get_outbuf()[j] += d_tail[j]; - - // copy nsamples to output - - //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float)); - //out += d_nsamples; - - j = dec_ctr; - while (j < d_nsamples) { - *out++ = d_invfft->get_outbuf()[j]; - j += decimation(); - } - dec_ctr = (j - d_nsamples); - - // stash the tail - memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, - tailsize() * sizeof(float)); - } - - assert((out - (float *) output_items[0]) == noutput_items); - assert(dec_ctr == 0); + //assert((out - (float *) output_items[0]) == noutput_items); return noutput_items; } diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h index b26361107..6eaa21500 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h @@ -28,9 +28,8 @@ class gr_fft_filter_fff; typedef boost::shared_ptr<gr_fft_filter_fff> gr_fft_filter_fff_sptr; gr_fft_filter_fff_sptr gr_make_fft_filter_fff (int decimation, const std::vector<float> &taps); -class gr_fir_fff; -class gri_fft_real_fwd; -class gri_fft_real_rev; +class gri_fft_filter_fff_generic; +//class gri_fft_filter_fff_sse; /*! * \brief Fast FFT filter with float input, float output and float taps @@ -41,15 +40,14 @@ class gr_fft_filter_fff : public gr_sync_decimator private: friend gr_fft_filter_fff_sptr gr_make_fft_filter_fff (int decimation, const std::vector<float> &taps); - int d_ntaps; int d_nsamples; - int d_fftsize; // fftsize = ntaps + nsamples - 1 - gri_fft_real_fwd *d_fwdfft; // forward "plan" - gri_fft_real_rev *d_invfft; // inverse "plan" - std::vector<float> d_tail; // state carried between blocks for overlap-add - std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps - std::vector<float> d_new_taps; bool d_updated; +#if 1 // don't enable the sse version until handling it is worked out + gri_fft_filter_fff_generic *d_filter; +#else + gri_fft_filter_fff_sse *d_filter; +#endif + std::vector<float> d_new_taps; /*! * Construct a FFT filter with the given taps @@ -58,10 +56,6 @@ class gr_fft_filter_fff : public gr_sync_decimator * \param taps float filter taps */ gr_fft_filter_fff (int decimation, const std::vector<float> &taps); - - void compute_sizes(int ntaps); - int tailsize() const { return d_ntaps - 1; } - void actual_set_taps (const std::vector<float> &taps); public: ~gr_fft_filter_fff (); diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc new file mode 100644 index 000000000..1e7fbe214 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc @@ -0,0 +1,166 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_ccc_generic.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> +#include <fftw3.h> + +gri_fft_filter_ccc_generic::gri_fft_filter_ccc_generic (int decimation, + const std::vector<gr_complex> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + set_taps(taps); +} + +gri_fft_filter_ccc_generic::~gri_fft_filter_ccc_generic () +{ + delete d_fwdfft; + delete d_invfft; +} + +#if 0 +static void +print_vector_complex(const std::string label, const std::vector<gr_complex> &x) +{ + std::cout << label; + for (unsigned i = 0; i < x.size(); i++) + std::cout << x[i] << " "; + std::cout << "\n"; +} +#endif + + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_ccc_generic::set_taps (const std::vector<gr_complex> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + gr_complex *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_ccc_generic::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_ccc_generic: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_complex(d_fftsize, true); + d_invfft = new gri_fft_complex(d_fftsize, false); + d_xformed_taps.resize(d_fftsize); + } +} + +int +gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_complex *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(gr_complex)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + gr_complex *a = d_fwdfft->get_outbuf(); + gr_complex *b = &d_xformed_taps[0]; + gr_complex *c = d_invfft->get_inbuf(); + + for (j = 0; j < d_fftsize; j+=1) { // filter in the freq domain + c[j] = a[j] * b[j]; + } + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(gr_complex)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h new file mode 100644 index 000000000..3cd9105c7 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H +#define INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_complex; + +/*! + * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps + * \ingroup filter_blk + */ +class gri_fft_filter_ccc_generic +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_complex *d_fwdfft; // forward "plan" + gri_fft_complex *d_invfft; // inverse "plan" + std::vector<gr_complex> d_tail; // state carried between blocks for overlap-add + std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps + std::vector<gr_complex> d_new_taps; + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct an FFT filter for complex vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for complex vectors (such as gr_fft_filter_ccc). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (complex) + */ + gri_fft_filter_ccc_generic (int decimation, const std::vector<gr_complex> &taps); + ~gri_fft_filter_ccc_generic (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (complex) + */ + int set_taps (const std::vector<gr_complex> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const gr_complex *input, gr_complex *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H */ diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc new file mode 100644 index 000000000..b7d925ff3 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc @@ -0,0 +1,186 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_ccc_sse.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> +#include <xmmintrin.h> +#include <fftw3.h> + +gri_fft_filter_ccc_sse::gri_fft_filter_ccc_sse (int decimation, + const std::vector<gr_complex> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex)); + set_taps(taps); +} + +gri_fft_filter_ccc_sse::~gri_fft_filter_ccc_sse () +{ + fftwf_free(d_xformed_taps); + delete d_fwdfft; + delete d_invfft; +} + +#if 0 +static void +print_vector_complex(const std::string label, const std::vector<gr_complex> &x) +{ + std::cout << label; + for (unsigned i = 0; i < x.size(); i++) + std::cout << x[i] << " "; + std::cout << "\n"; +} +#endif + + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_ccc_sse::set_taps (const std::vector<gr_complex> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + gr_complex *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_ccc_sse::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_ccc_sse: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_complex(d_fftsize, true); + d_invfft = new gri_fft_complex(d_fftsize, false); + + fftwf_free(d_xformed_taps); + d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize)*sizeof(gr_complex)); + } +} + +int +gri_fft_filter_ccc_sse::filter (int nitems, const gr_complex *input, gr_complex *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(gr_complex)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + float *a = (float*)(d_fwdfft->get_outbuf()); + float *b = (float*)(&d_xformed_taps[0]); + float *c = (float*)(d_invfft->get_inbuf()); + + __m128 x0, x1, x2, t0, t1, m; + m = _mm_set_ps(-1, 1, -1, 1); + for (j = 0; j < 2*d_fftsize; j+=4) { // filter in the freq domain + x0 = _mm_load_ps(&a[j]); + t0 = _mm_load_ps(&b[j]); + + t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1)); + t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0)); + t1 = _mm_mul_ps(t1, m); + + x1 = _mm_mul_ps(x0, t0); + x2 = _mm_mul_ps(x0, t1); + + x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)); + x2 = _mm_add_ps(x1, x2); + + _mm_store_ps(&c[j], x2); + } + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(gr_complex)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h new file mode 100644 index 000000000..d1c54f01f --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_CCC_SSE_H +#define INCLUDED_GRI_FFT_FILTER_CCC_SSE_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_complex; + +/*! + * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps + * \ingroup filter_blk + */ +class gri_fft_filter_ccc_sse +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_complex *d_fwdfft; // forward "plan" + gri_fft_complex *d_invfft; // inverse "plan" + std::vector<gr_complex> d_tail; // state carried between blocks for overlap-add + gr_complex *d_xformed_taps; + std::vector<gr_complex> d_new_taps; + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct an FFT filter for complex vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for complex vectors (such as gr_fft_filter_ccc). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (complex) + */ + gri_fft_filter_ccc_sse (int decimation, const std::vector<gr_complex> &taps); + ~gri_fft_filter_ccc_sse (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (complex) + */ + int set_taps (const std::vector<gr_complex> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const gr_complex *input, gr_complex *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_CCC_SSE_H */ diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc new file mode 100644 index 000000000..5a0916669 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc @@ -0,0 +1,157 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_fff_generic.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> + +gri_fft_filter_fff_generic::gri_fft_filter_fff_generic (int decimation, + const std::vector<float> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + set_taps(taps); +} + +gri_fft_filter_fff_generic::~gri_fft_filter_fff_generic () +{ + delete d_fwdfft; + delete d_invfft; +} + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_fff_generic::set_taps (const std::vector<float> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + float *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize/2+1; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_fff_generic::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_fff_generic: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_real_fwd(d_fftsize); + d_invfft = new gri_fft_real_rev(d_fftsize); + d_xformed_taps.resize(d_fftsize/2+1); + } +} + +int +gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + gr_complex *a = d_fwdfft->get_outbuf(); + gr_complex *b = &d_xformed_taps[0]; + gr_complex *c = d_invfft->get_inbuf(); + + for (j = 0; j < d_fftsize/2+1; j++) { // filter in the freq domain + c[j] = a[j] * b[j]; + } + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + + //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float)); + //out += d_nsamples; + + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(float)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h new file mode 100644 index 000000000..6c31632d5 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h @@ -0,0 +1,80 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H +#define INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_real_fwd; +class gri_fft_real_rev; + +class gri_fft_filter_fff_generic +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_real_fwd *d_fwdfft; // forward "plan" + gri_fft_real_rev *d_invfft; // inverse "plan" + std::vector<float> d_tail; // state carried between blocks for overlap-add + std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps + std::vector<float> d_new_taps; + + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct a FFT filter for float vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for floating point vectors (such as gr_fft_filter_fff). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (float) + */ + gri_fft_filter_fff_generic (int decimation, const std::vector<float> &taps); + ~gri_fft_filter_fff_generic (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (float) + */ + int set_taps (const std::vector<float> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const float *input, float *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H */ diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc new file mode 100644 index 000000000..2680e6594 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc @@ -0,0 +1,184 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <gri_fft_filter_fff_sse.h> +#include <gri_fft.h> +#include <assert.h> +#include <stdexcept> +#include <cstdio> +#include <xmmintrin.h> +#include <fftw3.h> + +gri_fft_filter_fff_sse::gri_fft_filter_fff_sse (int decimation, + const std::vector<float> &taps) + : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0) +{ + d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex)); + set_taps(taps); +} + +gri_fft_filter_fff_sse::~gri_fft_filter_fff_sse () +{ + fftwf_free(d_xformed_taps); + delete d_fwdfft; + delete d_invfft; +} + +/* + * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps + */ +int +gri_fft_filter_fff_sse::set_taps (const std::vector<float> &taps) +{ + int i = 0; + compute_sizes(taps.size()); + + d_tail.resize(tailsize()); + for (i = 0; i < tailsize(); i++) + d_tail[i] = 0; + + float *in = d_fwdfft->get_inbuf(); + gr_complex *out = d_fwdfft->get_outbuf(); + + float scale = 1.0 / d_fftsize; + + // Compute forward xform of taps. + // Copy taps into first ntaps slots, then pad with zeros + for (i = 0; i < d_ntaps; i++) + in[i] = taps[i] * scale; + + for (; i < d_fftsize; i++) + in[i] = 0; + + d_fwdfft->execute(); // do the xform + + // now copy output to d_xformed_taps + for (i = 0; i < d_fftsize/2+1; i++) + d_xformed_taps[i] = out[i]; + + return d_nsamples; +} + +// determine and set d_ntaps, d_nsamples, d_fftsize + +void +gri_fft_filter_fff_sse::compute_sizes(int ntaps) +{ + int old_fftsize = d_fftsize; + d_ntaps = ntaps; + d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2)))); + d_nsamples = d_fftsize - d_ntaps + 1; + + if (0) + fprintf(stderr, "gri_fft_filter_fff_sse: ntaps = %d, fftsize = %d, nsamples = %d\n", + d_ntaps, d_fftsize, d_nsamples); + + assert(d_fftsize == d_ntaps + d_nsamples -1 ); + + if (d_fftsize != old_fftsize){ // compute new plans + delete d_fwdfft; + delete d_invfft; + d_fwdfft = new gri_fft_real_fwd(d_fftsize); + d_invfft = new gri_fft_real_rev(d_fftsize); + //d_xformed_taps.resize(d_fftsize/2+1); + + fftwf_free(d_xformed_taps); + d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize/2+1)*sizeof(gr_complex)); + } +} + +int +gri_fft_filter_fff_sse::filter (int nitems, const float *input, float *output) +{ + int dec_ctr = 0; + int j = 0; + int ninput_items = nitems * d_decimation; + + for (int i = 0; i < ninput_items; i += d_nsamples){ + + memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float)); + + for (j = d_nsamples; j < d_fftsize; j++) + d_fwdfft->get_inbuf()[j] = 0; + + d_fwdfft->execute(); // compute fwd xform + + float *a = (float*)(d_fwdfft->get_outbuf()); + float *b = (float*)(&d_xformed_taps[0]); + float *c = (float*)(d_invfft->get_inbuf()); + + __m128 x0, x1, x2, t0, t1, m; + m = _mm_set_ps(-1, 1, -1, 1); + for (j = 0; j < d_fftsize; j+=4) { // filter in the freq domain + x0 = _mm_load_ps(&a[j]); + t0 = _mm_load_ps(&b[j]); + + t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1)); + t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0)); + t1 = _mm_mul_ps(t1, m); + + x1 = _mm_mul_ps(x0, t0); + x2 = _mm_mul_ps(x0, t1); + + x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1)); + x2 = _mm_add_ps(x1, x2); + + _mm_store_ps(&c[j], x2); + } + + // Finish off the last one; do the complex multiply as floats + j = d_fftsize/2; + c[j] = (a[j] * b[j]) - (a[j+1] * b[j+1]); + c[j+1] = (a[j] * b[j+1]) + (a[j+1] * b[j]); + + d_invfft->execute(); // compute inv xform + + // add in the overlapping tail + + for (j = 0; j < tailsize(); j++) + d_invfft->get_outbuf()[j] += d_tail[j]; + + // copy nsamples to output + + //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float)); + //out += d_nsamples; + + j = dec_ctr; + while (j < d_nsamples) { + *output++ = d_invfft->get_outbuf()[j]; + j += d_decimation; + } + dec_ctr = (j - d_nsamples); + + // stash the tail + memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples, + tailsize() * sizeof(float)); + } + + assert(dec_ctr == 0); + + return nitems; +} diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h new file mode 100644 index 000000000..8258bb824 --- /dev/null +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h @@ -0,0 +1,81 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GRI_FFT_FILTER_FFF_SSE_H +#define INCLUDED_GRI_FFT_FILTER_FFF_SSE_H + +#include <gr_complex.h> +#include <vector> + +class gri_fft_real_fwd; +class gri_fft_real_rev; + +class gri_fft_filter_fff_sse +{ + private: + int d_ntaps; + int d_nsamples; + int d_fftsize; // fftsize = ntaps + nsamples - 1 + int d_decimation; + gri_fft_real_fwd *d_fwdfft; // forward "plan" + gri_fft_real_rev *d_invfft; // inverse "plan" + std::vector<float> d_tail; // state carried between blocks for overlap-add + //std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps + gr_complex *d_xformed_taps; + std::vector<float> d_new_taps; + + + void compute_sizes(int ntaps); + int tailsize() const { return d_ntaps - 1; } + + public: + /*! + * \brief Construct a FFT filter for float vectors with the given taps and decimation rate. + * + * This is the basic implementation for performing FFT filter for fast convolution + * in other blocks for floating point vectors (such as gr_fft_filter_fff). + * \param decimation The decimation rate of the filter (int) + * \param taps The filter taps (float) + */ + gri_fft_filter_fff_sse (int decimation, const std::vector<float> &taps); + ~gri_fft_filter_fff_sse (); + + /*! + * \brief Set new taps for the filter. + * + * Sets new taps and resets the class properties to handle different sizes + * \param taps The filter taps (float) + */ + int set_taps (const std::vector<float> &taps); + + /*! + * \brief Perform the filter operation + * + * \param nitems The number of items to produce + * \param input The input vector to be filtered + * \param output The result of the filter operation + */ + int filter (int nitems, const float *input, float *output); + +}; + +#endif /* INCLUDED_GRI_FFT_FILTER_FFF_SSE_H */ |