13 files changed, 1073 insertions, 292 deletions
diff --git a/gnuradio-core/src/lib/filter/Makefile.am b/gnuradio-core/src/lib/filter/Makefile.am
index 9cd6e9f38..23c1dadc3 100644
--- a/gnuradio-core/src/lib/filter/Makefile.am
+++ b/gnuradio-core/src/lib/filter/Makefile.am
@@ -184,6 +184,8 @@ libfilter_la_common_SOURCES = 		\
 	$(GENERATED_CC)			\
 	gr_adaptive_fir_ccf.cc		\
 	gr_cma_equalizer_cc.cc		\
+	gri_fft_filter_fff_generic.cc	\
+	gri_fft_filter_ccc_generic.cc	\
 	gr_fft_filter_ccc.cc		\
 	gr_fft_filter_fff.cc		\
 	gr_goertzel_fc.cc		\
@@ -259,6 +261,8 @@ grinclude_HEADERS = 			\
 	gr_altivec.h			\
 	gr_cma_equalizer_cc.h		\
 	gr_cpu.h			\
+	gri_fft_filter_fff_generic.h	\
+	gri_fft_filter_ccc_generic.h	\
 	gr_fft_filter_ccc.h		\
 	gr_fft_filter_fff.h		\
 	gr_filter_delay_fc.h		\
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
index 3dd40d56d..4540c6e4a 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
@@ -30,6 +30,8 @@
 #endif
 
 #include <gr_fft_filter_ccc.h>
+//#include <gri_fft_filter_ccc_sse.h>
+#include <gri_fft_filter_ccc_generic.h>
 #include <gr_io_signature.h>
 #include <gri_fft.h>
 #include <math.h>
@@ -52,32 +54,23 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation, const std::vector<gr_compl
 		       gr_make_io_signature (1, 1, sizeof (gr_complex)),
 		       gr_make_io_signature (1, 1, sizeof (gr_complex)),
 		       decimation),
-    d_fftsize(-1), d_fwdfft(0), d_invfft(0), d_updated(false)
+    d_updated(false)
 {
-  // if (decimation != 1)
-  //    throw std::invalid_argument("gr_fft_filter_ccc: decimation must be 1");
-
   set_history(1);
-  actual_set_taps(taps);
+#if 1 // don't enable the sse version until handling it is worked out
+  d_filter = new gri_fft_filter_ccc_generic(decimation, taps);
+#else
+  d_filter = new gri_fft_filter_ccc_sse(decimation, taps);
+#endif
+  d_nsamples = d_filter->set_taps(taps);
+  set_output_multiple(d_nsamples);
 }
 
 gr_fft_filter_ccc::~gr_fft_filter_ccc ()
 {
-  delete d_fwdfft;
-  delete d_invfft;
+  delete d_filter;
 }
 
-#if 0
-static void 
-print_vector_complex(const std::string label, const std::vector<gr_complex> &x)
-{
-  std::cout << label;
-  for (unsigned i = 0; i < x.size(); i++)
-    std::cout << x[i] << " ";
-  std::cout << "\n";
-}
-#endif
-
 void
 gr_fft_filter_ccc::set_taps (const std::vector<gr_complex> &taps)
 {
@@ -85,130 +78,26 @@ gr_fft_filter_ccc::set_taps (const std::vector<gr_complex> &taps)
   d_updated = true;
 }
 
-/*
- * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
- */
-void
-gr_fft_filter_ccc::actual_set_taps (const std::vector<gr_complex> &taps)
-{
-  int i = 0;
-  compute_sizes(taps.size());
-
-  d_tail.resize(tailsize());
-  for (i = 0; i < tailsize(); i++)
-    d_tail[i] = 0;
-
-  gr_complex *in = d_fwdfft->get_inbuf();
-  gr_complex *out = d_fwdfft->get_outbuf();
-
-  float scale = 1.0 / d_fftsize;
-  
-  // Compute forward xform of taps.
-  // Copy taps into first ntaps slots, then pad with zeros
-  for (i = 0; i < d_ntaps; i++)
-    in[i] = taps[i] * scale;
-
-  for (; i < d_fftsize; i++)
-    in[i] = 0;
-
-  d_fwdfft->execute();		// do the xform
-
-  // now copy output to d_xformed_taps
-  for (i = 0; i < d_fftsize; i++)
-    d_xformed_taps[i] = out[i];
-
-  //print_vector_complex("transformed taps:", d_xformed_taps);
-}
-
-// determine and set d_ntaps, d_nsamples, d_fftsize
-
-void
-gr_fft_filter_ccc::compute_sizes(int ntaps)
-{
-  int old_fftsize = d_fftsize;
-  d_ntaps = ntaps;
-  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
-  d_nsamples = d_fftsize - d_ntaps + 1;
-
-  if (0)
-    fprintf(stderr, "gr_fft_filter: ntaps = %d, fftsize = %d, nsamples = %d\n",
-	    d_ntaps, d_fftsize, d_nsamples);
-
-  assert(d_fftsize == d_ntaps + d_nsamples -1 );
-
-  if (d_fftsize != old_fftsize){	// compute new plans
-    delete d_fwdfft;
-    delete d_invfft;
-    d_fwdfft = new gri_fft_complex(d_fftsize, true);
-    d_invfft = new gri_fft_complex(d_fftsize, false);
-    d_xformed_taps.resize(d_fftsize);
-  }
-
-  set_output_multiple(d_nsamples);
-}
-
 int
 gr_fft_filter_ccc::work (int noutput_items,
 			 gr_vector_const_void_star &input_items,
 			 gr_vector_void_star &output_items)
 {
-  gr_complex *in = (gr_complex *) input_items[0];
+  const gr_complex *in = (const gr_complex *) input_items[0];
   gr_complex *out = (gr_complex *) output_items[0];
 
   if (d_updated){
-    actual_set_taps(d_new_taps);
+    d_nsamples = d_filter->set_taps(d_new_taps);
     d_updated = false;
+    set_output_multiple(d_nsamples);
     return 0;				// output multiple may have changed
   }
 
   assert(noutput_items % d_nsamples == 0);
 
-  int dec_ctr = 0;
-  int j = 0;
-  int ninput_items = noutput_items * decimation();
-
-  for (int i = 0; i < ninput_items; i += d_nsamples){
-    
-    memcpy(d_fwdfft->get_inbuf(), &in[i], d_nsamples * sizeof(gr_complex));
-
-    for (j = d_nsamples; j < d_fftsize; j++)
-      d_fwdfft->get_inbuf()[j] = 0;
-
-    d_fwdfft->execute();	// compute fwd xform
-
-    gr_complex *a = d_fwdfft->get_outbuf();
-    gr_complex *b = &d_xformed_taps[0];
-    gr_complex *c = d_invfft->get_inbuf();
-
-    for (j = 0; j < d_fftsize; j++)	// filter in the freq domain
-      c[j] = a[j] * b[j];
-    
-    d_invfft->execute();	// compute inv xform
-
-    // add in the overlapping tail
-
-    for (j = 0; j < tailsize(); j++)
-      d_invfft->get_outbuf()[j] += d_tail[j];
-
-    // copy nsamples to output
-
-    //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(gr_complex));
-    //out += d_nsamples;
-
-    j = dec_ctr;
-    while (j < d_nsamples) {
-      *out++ = d_invfft->get_outbuf()[j];
-      j += decimation();
-    }
-    dec_ctr = (j - d_nsamples);
-
-    // stash the tail
-    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
-	   tailsize() * sizeof(gr_complex));
-  }
+  d_filter->filter(noutput_items, in, out);
 
-  assert((out - (gr_complex *) output_items[0]) == noutput_items);
-  assert(dec_ctr == 0);
+  //assert((out - (gr_complex *) output_items[0]) == noutput_items);
 
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h
index c5363dcbb..68b19e775 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.h
@@ -28,8 +28,8 @@ class gr_fft_filter_ccc;
 typedef boost::shared_ptr<gr_fft_filter_ccc> gr_fft_filter_ccc_sptr;
 gr_fft_filter_ccc_sptr gr_make_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps);
 
-class gr_fir_ccc;
-class gri_fft_complex;
+//class gri_fft_filter_ccc_sse;
+class gri_fft_filter_ccc_generic;
 
 /*!
  * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps
@@ -40,15 +40,14 @@ class gr_fft_filter_ccc : public gr_sync_decimator
  private:
   friend gr_fft_filter_ccc_sptr gr_make_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps);
 
-  int			   d_ntaps;
   int			   d_nsamples;
-  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
-  gri_fft_complex	  *d_fwdfft;		// forward "plan"
-  gri_fft_complex	  *d_invfft;		// inverse "plan"
-  std::vector<gr_complex>  d_tail;		// state carried between blocks for overlap-add
-  std::vector<gr_complex>  d_xformed_taps;	// Fourier xformed taps
-  std::vector<gr_complex>  d_new_taps;
   bool			   d_updated;
+#if 1  // don't enable the sse version until handling it is worked out
+  gri_fft_filter_ccc_generic  *d_filter;  
+#else
+  gri_fft_filter_ccc_sse  *d_filter;  
+#endif
+  std::vector<gr_complex>  d_new_taps;
 
   /*!
    * Construct a FFT filter with the given taps
@@ -58,10 +57,6 @@ class gr_fft_filter_ccc : public gr_sync_decimator
    */
   gr_fft_filter_ccc (int decimation, const std::vector<gr_complex> &taps);
 
-  void compute_sizes(int ntaps);
-  int tailsize() const { return d_ntaps - 1; }
-  void actual_set_taps (const std::vector<gr_complex> &taps);
-
  public:
   ~gr_fft_filter_ccc ();
 
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
index 57232f3fb..e8857fe8c 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2005 Free Software Foundation, Inc.
+ * Copyright 2005,2010 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -25,13 +25,11 @@
 #endif
 
 #include <gr_fft_filter_fff.h>
+#include <gri_fft_filter_fff_generic.h>
+//#include <gri_fft_filter_fff_sse.h>
 #include <gr_io_signature.h>
-#include <gri_fft.h>
-#include <math.h>
 #include <assert.h>
 #include <stdexcept>
-#include <gr_firdes.h>
-
 
 #include <cstdio>
 #include <iostream>
@@ -48,37 +46,24 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation, const std::vector<float> &
 		       gr_make_io_signature (1, 1, sizeof (float)),
 		       gr_make_io_signature (1, 1, sizeof (float)),
 		       decimation),
-    d_fftsize(-1), d_fwdfft(0), d_invfft(0), d_updated(false)
+    d_updated(false)
 {
   set_history(1);
-  actual_set_taps(taps);
-}
-
-gr_fft_filter_fff::~gr_fft_filter_fff ()
-{
-  delete d_fwdfft;
-  delete d_invfft;
-}
+  
+#if 1 // don't enable the sse version until handling it is worked out
+    d_filter = new gri_fft_filter_fff_generic(decimation, taps);
+#else
+    d_filter = new gri_fft_filter_fff_sse(decimation, taps);
+#endif
 
-#if 0
-static void 
-print_vector_complex(const std::string label, const std::vector<gr_complex> &x)
-{
-  std::cout << label;
-  for (unsigned i = 0; i < x.size(); i++)
-    std::cout << x[i] << " ";
-  std::cout << "\n";
+  d_nsamples = d_filter->set_taps(taps);
+  set_output_multiple(d_nsamples);
 }
 
-static void 
-print_vector_float(const std::string label, const std::vector<float> &x)
+gr_fft_filter_fff::~gr_fft_filter_fff ()
 {
-  std::cout << label;
-  for (unsigned i = 0; i < x.size(); i++)
-    std::cout << x[i] << " ";
-  std::cout << "\n";
+  delete d_filter;
 }
-#endif
 
 void
 gr_fft_filter_fff::set_taps (const std::vector<float> &taps)
@@ -87,68 +72,6 @@ gr_fft_filter_fff::set_taps (const std::vector<float> &taps)
   d_updated = true;
 }
 
-/*
- * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
- */
-void
-gr_fft_filter_fff::actual_set_taps (const std::vector<float> &taps)
-{
-  int i = 0;
-  compute_sizes(taps.size());
-
-  d_tail.resize(tailsize());
-  for (i = 0; i < tailsize(); i++)
-    d_tail[i] = 0;
-
-  float *in = d_fwdfft->get_inbuf();
-  gr_complex *out = d_fwdfft->get_outbuf();
-
-  float scale = 1.0 / d_fftsize;
-  
-  // Compute forward xform of taps.
-  // Copy taps into first ntaps slots, then pad with zeros
-  for (i = 0; i < d_ntaps; i++)
-    in[i] = taps[i] * scale;
-
-  for (; i < d_fftsize; i++)
-    in[i] = 0;
-
-  d_fwdfft->execute();		// do the xform
-
-  // now copy output to d_xformed_taps
-  for (i = 0; i < d_fftsize/2+1; i++)
-    d_xformed_taps[i] = out[i];
-
-  //print_vector_complex("transformed taps:", d_xformed_taps);
-}
-
-// determine and set d_ntaps, d_nsamples, d_fftsize
-
-void
-gr_fft_filter_fff::compute_sizes(int ntaps)
-{
-  int old_fftsize = d_fftsize;
-  d_ntaps = ntaps;
-  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
-  d_nsamples = d_fftsize - d_ntaps + 1;
-
-  if (0)
-    fprintf(stderr, "gr_fft_filter: ntaps = %d, fftsize = %d, nsamples = %d\n",
-	    d_ntaps, d_fftsize, d_nsamples);
-
-  assert(d_fftsize == d_ntaps + d_nsamples -1 );
-
-  if (d_fftsize != old_fftsize){	// compute new plans
-    delete d_fwdfft;
-    delete d_invfft;
-    d_fwdfft = new gri_fft_real_fwd(d_fftsize);
-    d_invfft = new gri_fft_real_rev(d_fftsize);
-    d_xformed_taps.resize(d_fftsize/2+1);
-  }
-
-  set_output_multiple(d_nsamples);
-}
-
 int
 gr_fft_filter_fff::work (int noutput_items,
 			 gr_vector_const_void_star &input_items,
@@ -158,59 +81,17 @@ gr_fft_filter_fff::work (int noutput_items,
   float *out = (float *) output_items[0];
 
   if (d_updated){
-    actual_set_taps(d_new_taps);
+    d_nsamples = d_filter->set_taps(d_new_taps);
     d_updated = false;
+    set_output_multiple(d_nsamples);
     return 0;				// output multiple may have changed
   }
 
   assert(noutput_items % d_nsamples == 0);
+  
+  d_filter->filter(noutput_items, in, out);
 
-  int dec_ctr = 0;
-  int j = 0;
-  int ninput_items = noutput_items * decimation();
-
-  for (int i = 0; i < ninput_items; i += d_nsamples){
-    
-    memcpy(d_fwdfft->get_inbuf(), &in[i], d_nsamples * sizeof(float));
-
-    for (j = d_nsamples; j < d_fftsize; j++)
-      d_fwdfft->get_inbuf()[j] = 0;
-
-    d_fwdfft->execute();	// compute fwd xform
-
-    gr_complex *a = d_fwdfft->get_outbuf();
-    gr_complex *b = &d_xformed_taps[0];
-    gr_complex *c = d_invfft->get_inbuf();
-
-    for (j = 0; j < d_fftsize/2+1; j++)	// filter in the freq domain
-      c[j] = a[j] * b[j];
-    
-    d_invfft->execute();	// compute inv xform
-
-    // add in the overlapping tail
-
-    for (j = 0; j < tailsize(); j++)
-      d_invfft->get_outbuf()[j] += d_tail[j];
-
-    // copy nsamples to output
-
-    //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float));
-    //out += d_nsamples;
-
-    j = dec_ctr;
-    while (j < d_nsamples) {
-      *out++ = d_invfft->get_outbuf()[j];
-      j += decimation();
-    }
-    dec_ctr = (j - d_nsamples);
-
-    // stash the tail
-    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
-	   tailsize() * sizeof(float));
-  }
-
-  assert((out - (float *) output_items[0]) == noutput_items);
-  assert(dec_ctr == 0);
+  //assert((out - (float *) output_items[0]) == noutput_items);
 
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h
index b26361107..6eaa21500 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.h
@@ -28,9 +28,8 @@ class gr_fft_filter_fff;
 typedef boost::shared_ptr<gr_fft_filter_fff> gr_fft_filter_fff_sptr;
 gr_fft_filter_fff_sptr gr_make_fft_filter_fff (int decimation, const std::vector<float> &taps);
 
-class gr_fir_fff;
-class gri_fft_real_fwd;
-class gri_fft_real_rev;
+class gri_fft_filter_fff_generic;
+//class gri_fft_filter_fff_sse;
 
 /*!
  * \brief Fast FFT filter with float input, float output and float taps
@@ -41,15 +40,14 @@ class gr_fft_filter_fff : public gr_sync_decimator
  private:
   friend gr_fft_filter_fff_sptr gr_make_fft_filter_fff (int decimation, const std::vector<float> &taps);
 
-  int			   d_ntaps;
   int			   d_nsamples;
-  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
-  gri_fft_real_fwd	  *d_fwdfft;		// forward "plan"
-  gri_fft_real_rev	  *d_invfft;		// inverse "plan"
-  std::vector<float>       d_tail;		// state carried between blocks for overlap-add
-  std::vector<gr_complex>  d_xformed_taps;	// Fourier xformed taps
-  std::vector<float>	   d_new_taps;
   bool			   d_updated;
+#if 1 // don't enable the sse version until handling it is worked out
+  gri_fft_filter_fff_generic  *d_filter;
+#else
+  gri_fft_filter_fff_sse  *d_filter;
+#endif
+  std::vector<float>	   d_new_taps;
 
   /*!
    * Construct a FFT filter with the given taps
@@ -58,10 +56,6 @@ class gr_fft_filter_fff : public gr_sync_decimator
    * \param taps        float filter taps
    */
   gr_fft_filter_fff (int decimation, const std::vector<float> &taps);
-
-  void compute_sizes(int ntaps);
-  int tailsize() const { return d_ntaps - 1; }
-  void actual_set_taps (const std::vector<float> &taps);
   
  public:
   ~gr_fft_filter_fff ();
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
new file mode 100644
index 000000000..1e7fbe214
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
@@ -0,0 +1,166 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gri_fft_filter_ccc_generic.h>
+#include <gri_fft.h>
+#include <assert.h>
+#include <stdexcept>
+#include <cstdio>
+#include <fftw3.h>
+
+gri_fft_filter_ccc_generic::gri_fft_filter_ccc_generic (int decimation, 
+							const std::vector<gr_complex> &taps)
+  : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0)
+{
+  set_taps(taps);
+}
+
+gri_fft_filter_ccc_generic::~gri_fft_filter_ccc_generic ()
+{
+  delete d_fwdfft;
+  delete d_invfft;
+}
+
+#if 0
+static void 
+print_vector_complex(const std::string label, const std::vector<gr_complex> &x)
+{
+  std::cout << label;
+  for (unsigned i = 0; i < x.size(); i++)
+    std::cout << x[i] << " ";
+  std::cout << "\n";
+}
+#endif
+
+
+/*
+ * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
+ */
+int
+gri_fft_filter_ccc_generic::set_taps (const std::vector<gr_complex> &taps)
+{
+  int i = 0;
+  compute_sizes(taps.size());
+
+  d_tail.resize(tailsize());
+  for (i = 0; i < tailsize(); i++)
+    d_tail[i] = 0;
+
+  gr_complex *in = d_fwdfft->get_inbuf();
+  gr_complex *out = d_fwdfft->get_outbuf();
+
+  float scale = 1.0 / d_fftsize;
+  
+  // Compute forward xform of taps.
+  // Copy taps into first ntaps slots, then pad with zeros
+  for (i = 0; i < d_ntaps; i++)
+    in[i] = taps[i] * scale;
+
+  for (; i < d_fftsize; i++)
+    in[i] = 0;
+
+  d_fwdfft->execute();		// do the xform
+
+  // now copy output to d_xformed_taps
+  for (i = 0; i < d_fftsize; i++)
+    d_xformed_taps[i] = out[i];
+  
+  return d_nsamples;
+}
+
+// determine and set d_ntaps, d_nsamples, d_fftsize
+
+void
+gri_fft_filter_ccc_generic::compute_sizes(int ntaps)
+{
+  int old_fftsize = d_fftsize;
+  d_ntaps = ntaps;
+  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
+  d_nsamples = d_fftsize - d_ntaps + 1;
+
+  if (0)
+    fprintf(stderr, "gri_fft_filter_ccc_generic: ntaps = %d, fftsize = %d, nsamples = %d\n",
+	    d_ntaps, d_fftsize, d_nsamples);
+
+  assert(d_fftsize == d_ntaps + d_nsamples -1 );
+
+  if (d_fftsize != old_fftsize){	// compute new plans
+    delete d_fwdfft;
+    delete d_invfft;
+    d_fwdfft = new gri_fft_complex(d_fftsize, true);
+    d_invfft = new gri_fft_complex(d_fftsize, false);
+    d_xformed_taps.resize(d_fftsize);
+  }
+}
+
+int
+gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_complex *output)
+{
+  int dec_ctr = 0;
+  int j = 0;
+  int ninput_items = nitems * d_decimation;
+
+  for (int i = 0; i < ninput_items; i += d_nsamples){
+    
+    memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(gr_complex));
+
+    for (j = d_nsamples; j < d_fftsize; j++)
+      d_fwdfft->get_inbuf()[j] = 0;
+
+    d_fwdfft->execute();	// compute fwd xform
+    
+    gr_complex *a = d_fwdfft->get_outbuf();
+    gr_complex *b = &d_xformed_taps[0];
+    gr_complex *c = d_invfft->get_inbuf();
+
+    for (j = 0; j < d_fftsize; j+=1) {	// filter in the freq domain
+      c[j] = a[j] * b[j];
+    } 
+    
+    d_invfft->execute();	// compute inv xform
+
+    // add in the overlapping tail
+
+    for (j = 0; j < tailsize(); j++)
+      d_invfft->get_outbuf()[j] += d_tail[j];
+
+    // copy nsamples to output
+    j = dec_ctr;
+    while (j < d_nsamples) {
+      *output++ = d_invfft->get_outbuf()[j];
+      j += d_decimation;
+    }
+    dec_ctr = (j - d_nsamples);
+
+    // stash the tail
+    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
+	   tailsize() * sizeof(gr_complex));
+  }
+
+  assert(dec_ctr == 0);
+
+  return nitems;
+}
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h
new file mode 100644
index 000000000..3cd9105c7
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h
@@ -0,0 +1,82 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H
+#define INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H
+
+#include <gr_complex.h>
+#include <vector>
+
+class gri_fft_complex;
+
+/*!
+ * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps
+ * \ingroup filter_blk
+ */
+class gri_fft_filter_ccc_generic
+{
+ private:
+  int			   d_ntaps;
+  int			   d_nsamples;
+  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
+  int                      d_decimation;
+  gri_fft_complex	  *d_fwdfft;		// forward "plan"
+  gri_fft_complex	  *d_invfft;		// inverse "plan"
+  std::vector<gr_complex>  d_tail;		// state carried between blocks for overlap-add
+  std::vector<gr_complex>  d_xformed_taps;	// Fourier xformed taps
+  std::vector<gr_complex>  d_new_taps;
+
+  void compute_sizes(int ntaps);
+  int tailsize() const { return d_ntaps - 1; }
+  
+ public:
+  /*!
+   * \brief Construct an FFT filter for complex vectors with the given taps and decimation rate.
+   *
+   * This is the basic implementation for performing FFT filter for fast convolution
+   * in other blocks for complex vectors (such as gr_fft_filter_ccc).
+   * \param decimation The decimation rate of the filter (int)
+   * \param taps       The filter taps (complex)
+   */
+  gri_fft_filter_ccc_generic (int decimation, const std::vector<gr_complex> &taps);
+  ~gri_fft_filter_ccc_generic ();
+
+  /*!
+   * \brief Set new taps for the filter.
+   *
+   * Sets new taps and resets the class properties to handle different sizes
+   * \param taps       The filter taps (complex)
+   */
+  int set_taps (const std::vector<gr_complex> &taps);
+  
+  /*!
+   * \brief Perform the filter operation
+   *
+   * \param nitems  The number of items to produce
+   * \param input   The input vector to be filtered
+   * \param output  The result of the filter operation
+   */
+  int filter (int nitems, const gr_complex *input, gr_complex *output);
+
+};
+
+#endif /* INCLUDED_GRI_FFT_FILTER_CCC_GENERIC_H */
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc
new file mode 100644
index 000000000..b7d925ff3
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.cc
@@ -0,0 +1,186 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gri_fft_filter_ccc_sse.h>
+#include <gri_fft.h>
+#include <assert.h>
+#include <stdexcept>
+#include <cstdio>
+#include <xmmintrin.h>
+#include <fftw3.h>
+
+gri_fft_filter_ccc_sse::gri_fft_filter_ccc_sse (int decimation,
+						const std::vector<gr_complex> &taps)
+  : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0)
+{
+  d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex));
+  set_taps(taps);
+}
+
+gri_fft_filter_ccc_sse::~gri_fft_filter_ccc_sse ()
+{
+  fftwf_free(d_xformed_taps);
+  delete d_fwdfft;
+  delete d_invfft;
+}
+
+#if 0
+static void 
+print_vector_complex(const std::string label, const std::vector<gr_complex> &x)
+{
+  std::cout << label;
+  for (unsigned i = 0; i < x.size(); i++)
+    std::cout << x[i] << " ";
+  std::cout << "\n";
+}
+#endif
+
+
+/*
+ * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
+ */
+int
+gri_fft_filter_ccc_sse::set_taps (const std::vector<gr_complex> &taps)
+{
+  int i = 0;
+  compute_sizes(taps.size());
+
+  d_tail.resize(tailsize());
+  for (i = 0; i < tailsize(); i++)
+    d_tail[i] = 0;
+
+  gr_complex *in = d_fwdfft->get_inbuf();
+  gr_complex *out = d_fwdfft->get_outbuf();
+
+  float scale = 1.0 / d_fftsize;
+  
+  // Compute forward xform of taps.
+  // Copy taps into first ntaps slots, then pad with zeros
+  for (i = 0; i < d_ntaps; i++)
+    in[i] = taps[i] * scale;
+
+  for (; i < d_fftsize; i++)
+    in[i] = 0;
+
+  d_fwdfft->execute();		// do the xform
+
+  // now copy output to d_xformed_taps
+  for (i = 0; i < d_fftsize; i++)
+    d_xformed_taps[i] = out[i];
+  
+  return d_nsamples;
+}
+
+// determine and set d_ntaps, d_nsamples, d_fftsize
+
+void
+gri_fft_filter_ccc_sse::compute_sizes(int ntaps)
+{
+  int old_fftsize = d_fftsize;
+  d_ntaps = ntaps;
+  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
+  d_nsamples = d_fftsize - d_ntaps + 1;
+
+  if (0)
+    fprintf(stderr, "gri_fft_filter_ccc_sse: ntaps = %d, fftsize = %d, nsamples = %d\n",
+	    d_ntaps, d_fftsize, d_nsamples);
+
+  assert(d_fftsize == d_ntaps + d_nsamples -1 );
+
+  if (d_fftsize != old_fftsize){	// compute new plans
+    delete d_fwdfft;
+    delete d_invfft;
+    d_fwdfft = new gri_fft_complex(d_fftsize, true);
+    d_invfft = new gri_fft_complex(d_fftsize, false);
+    
+    fftwf_free(d_xformed_taps);
+    d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize)*sizeof(gr_complex));
+  }
+}
+
+int
+gri_fft_filter_ccc_sse::filter (int nitems, const gr_complex *input, gr_complex *output)
+{
+  int dec_ctr = 0;
+  int j = 0;
+  int ninput_items = nitems * d_decimation;
+
+  for (int i = 0; i < ninput_items; i += d_nsamples){
+    
+    memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(gr_complex));
+
+    for (j = d_nsamples; j < d_fftsize; j++)
+      d_fwdfft->get_inbuf()[j] = 0;
+
+    d_fwdfft->execute();	// compute fwd xform
+    
+    float *a = (float*)(d_fwdfft->get_outbuf());
+    float *b = (float*)(&d_xformed_taps[0]);
+    float *c = (float*)(d_invfft->get_inbuf());
+
+    __m128 x0, x1, x2, t0, t1, m;
+    m = _mm_set_ps(-1, 1, -1, 1);
+    for (j = 0; j < 2*d_fftsize; j+=4) {	// filter in the freq domain
+      x0 = _mm_load_ps(&a[j]);
+      t0 = _mm_load_ps(&b[j]);
+      
+      t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1));
+      t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0));
+      t1 = _mm_mul_ps(t1, m);
+
+      x1 = _mm_mul_ps(x0, t0);
+      x2 = _mm_mul_ps(x0, t1);
+
+      x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1));
+      x2 = _mm_add_ps(x1, x2);
+
+      _mm_store_ps(&c[j], x2);
+    }
+
+    d_invfft->execute();	// compute inv xform
+
+    // add in the overlapping tail
+
+    for (j = 0; j < tailsize(); j++)
+      d_invfft->get_outbuf()[j] += d_tail[j];
+
+    // copy nsamples to output
+    j = dec_ctr;
+    while (j < d_nsamples) {
+      *output++ = d_invfft->get_outbuf()[j];
+      j += d_decimation;
+    }
+    dec_ctr = (j - d_nsamples);
+
+    // stash the tail
+    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
+	   tailsize() * sizeof(gr_complex));
+  }
+
+  assert(dec_ctr == 0);
+
+  return nitems;
+}
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h
new file mode 100644
index 000000000..d1c54f01f
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_sse.h
@@ -0,0 +1,82 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GRI_FFT_FILTER_CCC_SSE_H
+#define INCLUDED_GRI_FFT_FILTER_CCC_SSE_H
+
+#include <gr_complex.h>
+#include <vector>
+
+class gri_fft_complex;
+
+/*!
+ * \brief Fast FFT filter with gr_complex input, gr_complex output and gr_complex taps
+ * \ingroup filter_blk
+ */
+class gri_fft_filter_ccc_sse
+{
+ private:
+  int			   d_ntaps;
+  int			   d_nsamples;
+  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
+  int                      d_decimation;
+  gri_fft_complex	  *d_fwdfft;		// forward "plan"
+  gri_fft_complex	  *d_invfft;		// inverse "plan"
+  std::vector<gr_complex>  d_tail;		// state carried between blocks for overlap-add
+  gr_complex              *d_xformed_taps;
+  std::vector<gr_complex>  d_new_taps;
+
+  void compute_sizes(int ntaps);
+  int tailsize() const { return d_ntaps - 1; }
+  
+ public:
+  /*!
+   * \brief Construct an FFT filter for complex vectors with the given taps and decimation rate.
+   *
+   * This is the basic implementation for performing FFT filter for fast convolution
+   * in other blocks for complex vectors (such as gr_fft_filter_ccc).
+   * \param decimation The decimation rate of the filter (int)
+   * \param taps       The filter taps (complex)
+   */
+  gri_fft_filter_ccc_sse (int decimation, const std::vector<gr_complex> &taps);
+  ~gri_fft_filter_ccc_sse ();
+
+  /*!
+   * \brief Set new taps for the filter.
+   *
+   * Sets new taps and resets the class properties to handle different sizes
+   * \param taps       The filter taps (complex)
+   */
+  int set_taps (const std::vector<gr_complex> &taps);
+  
+  /*!
+   * \brief Perform the filter operation
+   *
+   * \param nitems  The number of items to produce
+   * \param input   The input vector to be filtered
+   * \param output  The result of the filter operation
+   */
+  int filter (int nitems, const gr_complex *input, gr_complex *output);
+
+};
+
+#endif /* INCLUDED_GRI_FFT_FILTER_CCC_SSE_H */
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
new file mode 100644
index 000000000..5a0916669
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
@@ -0,0 +1,157 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gri_fft_filter_fff_generic.h>
+#include <gri_fft.h>
+#include <assert.h>
+#include <stdexcept>
+#include <cstdio>
+
+gri_fft_filter_fff_generic::gri_fft_filter_fff_generic (int decimation, 
+							const std::vector<float> &taps)
+  : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0)
+{
+  set_taps(taps);
+}
+
+gri_fft_filter_fff_generic::~gri_fft_filter_fff_generic ()
+{
+  delete d_fwdfft;
+  delete d_invfft;
+}
+
+/*
+ * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
+ */
+int
+gri_fft_filter_fff_generic::set_taps (const std::vector<float> &taps)
+{
+  int i = 0;
+  compute_sizes(taps.size());
+
+  d_tail.resize(tailsize());
+  for (i = 0; i < tailsize(); i++)
+    d_tail[i] = 0;
+
+  float *in = d_fwdfft->get_inbuf();
+  gr_complex *out = d_fwdfft->get_outbuf();
+
+  float scale = 1.0 / d_fftsize;
+  
+  // Compute forward xform of taps.
+  // Copy taps into first ntaps slots, then pad with zeros
+  for (i = 0; i < d_ntaps; i++)
+    in[i] = taps[i] * scale;
+
+  for (; i < d_fftsize; i++)
+    in[i] = 0;
+
+  d_fwdfft->execute();		// do the xform
+
+  // now copy output to d_xformed_taps
+  for (i = 0; i < d_fftsize/2+1; i++)
+    d_xformed_taps[i] = out[i];
+  
+  return d_nsamples;
+}
+
+// determine and set d_ntaps, d_nsamples, d_fftsize
+
+void
+gri_fft_filter_fff_generic::compute_sizes(int ntaps)
+{
+  int old_fftsize = d_fftsize;
+  d_ntaps = ntaps;
+  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
+  d_nsamples = d_fftsize - d_ntaps + 1;
+
+  if (0)
+    fprintf(stderr, "gri_fft_filter_fff_generic: ntaps = %d, fftsize = %d, nsamples = %d\n",
+	    d_ntaps, d_fftsize, d_nsamples);
+
+  assert(d_fftsize == d_ntaps + d_nsamples -1 );
+
+  if (d_fftsize != old_fftsize){	// compute new plans
+    delete d_fwdfft;
+    delete d_invfft;
+    d_fwdfft = new gri_fft_real_fwd(d_fftsize);
+    d_invfft = new gri_fft_real_rev(d_fftsize);
+    d_xformed_taps.resize(d_fftsize/2+1);
+  }
+}
+
+int
+gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *output)
+{
+  int dec_ctr = 0;
+  int j = 0;
+  int ninput_items = nitems * d_decimation;
+
+  for (int i = 0; i < ninput_items; i += d_nsamples){
+    
+    memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float));
+
+    for (j = d_nsamples; j < d_fftsize; j++)
+      d_fwdfft->get_inbuf()[j] = 0;
+
+    d_fwdfft->execute();	// compute fwd xform
+
+    gr_complex *a = d_fwdfft->get_outbuf();
+    gr_complex *b = &d_xformed_taps[0];
+    gr_complex *c = d_invfft->get_inbuf();
+
+    for (j = 0; j < d_fftsize/2+1; j++) {	// filter in the freq domain
+      c[j] = a[j] * b[j];
+    }      
+   
+    d_invfft->execute();	// compute inv xform
+
+    // add in the overlapping tail
+
+    for (j = 0; j < tailsize(); j++)
+      d_invfft->get_outbuf()[j] += d_tail[j];
+
+    // copy nsamples to output
+
+    //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float));
+    //out += d_nsamples;
+
+    j = dec_ctr;
+    while (j < d_nsamples) {
+      *output++ = d_invfft->get_outbuf()[j];
+      j += d_decimation;
+    }
+    dec_ctr = (j - d_nsamples);
+
+    // stash the tail
+    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
+	   tailsize() * sizeof(float));
+  }
+
+  assert(dec_ctr == 0);
+
+  return nitems;
+}
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h
new file mode 100644
index 000000000..6c31632d5
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h
@@ -0,0 +1,80 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H
+#define INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H
+
+#include <gr_complex.h>
+#include <vector>
+
+class gri_fft_real_fwd;
+class gri_fft_real_rev;
+
+class gri_fft_filter_fff_generic
+{
+ private:
+  int			   d_ntaps;
+  int			   d_nsamples;
+  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
+  int                      d_decimation;
+  gri_fft_real_fwd	  *d_fwdfft;		// forward "plan"
+  gri_fft_real_rev	  *d_invfft;		// inverse "plan"
+  std::vector<float>       d_tail;		// state carried between blocks for overlap-add
+  std::vector<gr_complex>  d_xformed_taps;	// Fourier xformed taps
+  std::vector<float>	   d_new_taps;
+
+
+  void compute_sizes(int ntaps);
+  int tailsize() const { return d_ntaps - 1; }
+  
+ public:
+  /*!
+   * \brief Construct a FFT filter for float vectors with the given taps and decimation rate.
+   *
+   * This is the basic implementation for performing FFT filter for fast convolution
+   * in other blocks for floating point vectors (such as gr_fft_filter_fff).
+   * \param decimation The decimation rate of the filter (int)
+   * \param taps       The filter taps (float)
+   */
+  gri_fft_filter_fff_generic (int decimation, const std::vector<float> &taps);
+  ~gri_fft_filter_fff_generic ();
+
+  /*!
+   * \brief Set new taps for the filter.
+   *
+   * Sets new taps and resets the class properties to handle different sizes
+   * \param taps       The filter taps (float)
+   */
+  int set_taps (const std::vector<float> &taps);
+  
+  /*!
+   * \brief Perform the filter operation
+   *
+   * \param nitems  The number of items to produce
+   * \param input   The input vector to be filtered
+   * \param output  The result of the filter operation
+   */
+  int filter (int nitems, const float *input, float *output);
+
+};
+
+#endif /* INCLUDED_GRI_FFT_FILTER_FFF_GENERIC_H */
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc
new file mode 100644
index 000000000..2680e6594
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc
@@ -0,0 +1,184 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gri_fft_filter_fff_sse.h>
+#include <gri_fft.h>
+#include <assert.h>
+#include <stdexcept>
+#include <cstdio>
+#include <xmmintrin.h>
+#include <fftw3.h>
+
+gri_fft_filter_fff_sse::gri_fft_filter_fff_sse (int decimation, 
+							const std::vector<float> &taps)
+  : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0)
+{
+  d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex));
+  set_taps(taps);
+}
+
+gri_fft_filter_fff_sse::~gri_fft_filter_fff_sse ()
+{
+  fftwf_free(d_xformed_taps);
+  delete d_fwdfft;
+  delete d_invfft;
+}
+
+/*
+ * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
+ */
+int
+gri_fft_filter_fff_sse::set_taps (const std::vector<float> &taps)
+{
+  int i = 0;
+  compute_sizes(taps.size());
+
+  d_tail.resize(tailsize());
+  for (i = 0; i < tailsize(); i++)
+    d_tail[i] = 0;
+
+  float *in = d_fwdfft->get_inbuf();
+  gr_complex *out = d_fwdfft->get_outbuf();
+
+  float scale = 1.0 / d_fftsize;
+  
+  // Compute forward xform of taps.
+  // Copy taps into first ntaps slots, then pad with zeros
+  for (i = 0; i < d_ntaps; i++)
+    in[i] = taps[i] * scale;
+
+  for (; i < d_fftsize; i++)
+    in[i] = 0;
+
+  d_fwdfft->execute();		// do the xform
+
+  // now copy output to d_xformed_taps
+  for (i = 0; i < d_fftsize/2+1; i++)
+    d_xformed_taps[i] = out[i];
+  
+  return d_nsamples;
+}
+
+// determine and set d_ntaps, d_nsamples, d_fftsize
+
+void
+gri_fft_filter_fff_sse::compute_sizes(int ntaps)
+{
+  int old_fftsize = d_fftsize;
+  d_ntaps = ntaps;
+  d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
+  d_nsamples = d_fftsize - d_ntaps + 1;
+
+  if (0)
+    fprintf(stderr, "gri_fft_filter_fff_sse: ntaps = %d, fftsize = %d, nsamples = %d\n",
+	    d_ntaps, d_fftsize, d_nsamples);
+
+  assert(d_fftsize == d_ntaps + d_nsamples -1 );
+
+  if (d_fftsize != old_fftsize){	// compute new plans
+    delete d_fwdfft;
+    delete d_invfft;
+    d_fwdfft = new gri_fft_real_fwd(d_fftsize);
+    d_invfft = new gri_fft_real_rev(d_fftsize);
+    //d_xformed_taps.resize(d_fftsize/2+1);
+
+    fftwf_free(d_xformed_taps);
+    d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize/2+1)*sizeof(gr_complex));
+  }
+}
+
+int
+gri_fft_filter_fff_sse::filter (int nitems, const float *input, float *output)
+{
+  int dec_ctr = 0;
+  int j = 0;
+  int ninput_items = nitems * d_decimation;
+
+  for (int i = 0; i < ninput_items; i += d_nsamples){
+    
+    memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float));
+
+    for (j = d_nsamples; j < d_fftsize; j++)
+      d_fwdfft->get_inbuf()[j] = 0;
+
+    d_fwdfft->execute();	// compute fwd xform
+
+    float *a = (float*)(d_fwdfft->get_outbuf());
+    float *b = (float*)(&d_xformed_taps[0]);
+    float *c = (float*)(d_invfft->get_inbuf());
+
+    __m128 x0, x1, x2, t0, t1, m;
+    m = _mm_set_ps(-1, 1, -1, 1);
+    for (j = 0; j < d_fftsize; j+=4) {	// filter in the freq domain
+      x0 = _mm_load_ps(&a[j]);
+      t0 = _mm_load_ps(&b[j]);
+      
+      t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1));
+      t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0));
+      t1 = _mm_mul_ps(t1, m);
+
+      x1 = _mm_mul_ps(x0, t0);
+      x2 = _mm_mul_ps(x0, t1);
+
+      x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1));
+      x2 = _mm_add_ps(x1, x2);
+
+      _mm_store_ps(&c[j], x2);
+    }
+    
+    // Finish off the last one; do the complex multiply as floats
+    j = d_fftsize/2;
+    c[j] = (a[j] * b[j]) - (a[j+1] * b[j+1]);
+    c[j+1] = (a[j] * b[j+1]) + (a[j+1] * b[j]);
+
+    d_invfft->execute();	// compute inv xform
+
+    // add in the overlapping tail
+
+    for (j = 0; j < tailsize(); j++)
+      d_invfft->get_outbuf()[j] += d_tail[j];
+
+    // copy nsamples to output
+
+    //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float));
+    //out += d_nsamples;
+
+    j = dec_ctr;
+    while (j < d_nsamples) {
+      *output++ = d_invfft->get_outbuf()[j];
+      j += d_decimation;
+    }
+    dec_ctr = (j - d_nsamples);
+
+    // stash the tail
+    memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
+	   tailsize() * sizeof(float));
+  }
+
+  assert(dec_ctr == 0);
+
+  return nitems;
+}
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h
new file mode 100644
index 000000000..8258bb824
--- /dev/null
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.h
@@ -0,0 +1,81 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GRI_FFT_FILTER_FFF_SSE_H
+#define INCLUDED_GRI_FFT_FILTER_FFF_SSE_H
+
+#include <gr_complex.h>
+#include <vector>
+
+class gri_fft_real_fwd;
+class gri_fft_real_rev;
+
+class gri_fft_filter_fff_sse
+{
+ private:
+  int			   d_ntaps;
+  int			   d_nsamples;
+  int			   d_fftsize;		// fftsize = ntaps + nsamples - 1
+  int                      d_decimation;
+  gri_fft_real_fwd	  *d_fwdfft;		// forward "plan"
+  gri_fft_real_rev	  *d_invfft;		// inverse "plan"
+  std::vector<float>       d_tail;		// state carried between blocks for overlap-add
+  //std::vector<gr_complex>  d_xformed_taps;	// Fourier xformed taps
+  gr_complex              *d_xformed_taps;
+  std::vector<float>	   d_new_taps;
+
+
+  void compute_sizes(int ntaps);
+  int tailsize() const { return d_ntaps - 1; }
+  
+ public:
+  /*!
+   * \brief Construct a FFT filter for float vectors with the given taps and decimation rate.
+   *
+   * This is the basic implementation for performing FFT filter for fast convolution
+   * in other blocks for floating point vectors (such as gr_fft_filter_fff).
+   * \param decimation The decimation rate of the filter (int)
+   * \param taps       The filter taps (float)
+   */
+  gri_fft_filter_fff_sse (int decimation, const std::vector<float> &taps);
+  ~gri_fft_filter_fff_sse ();
+
+  /*!
+   * \brief Set new taps for the filter.
+   *
+   * Sets new taps and resets the class properties to handle different sizes
+   * \param taps       The filter taps (float)
+   */
+  int set_taps (const std::vector<float> &taps);
+  
+  /*!
+   * \brief Perform the filter operation
+   *
+   * \param nitems  The number of items to produce
+   * \param input   The input vector to be filtered
+   * \param output  The result of the filter operation
+   */
+  int filter (int nitems, const float *input, float *output);
+
+};
+
+#endif /* INCLUDED_GRI_FFT_FILTER_FFF_SSE_H */