4 files changed, 425 insertions, 164 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index 1fb3afb4d..ba82c7f48 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -51,13 +51,13 @@ namespace gr {
 			 unsigned long n,
 			 unsigned int decimate);
 
-      private:
-	unsigned int d_ntaps;
-	float  *d_taps;
-	float **d_aligned_taps;
-	float  *d_output;
-	int     d_align;
-	int     d_naligned;
+      protected:
+	std::vector<float> d_taps;
+	unsigned int  d_ntaps;
+	float       **d_aligned_taps;
+	float        *d_output;
+	int           d_align;
+	int           d_naligned;
       };
 
       /**************************************************************/
@@ -82,9 +82,9 @@ namespace gr {
 			unsigned long n,
 			unsigned int decimate);
 
-      private:
+      protected:
+	std::vector<float> d_taps;
 	unsigned int d_ntaps;
-	float       *d_taps;
 	float      **d_aligned_taps;
 	gr_complex  *d_output;
 	int          d_align;
@@ -113,9 +113,9 @@ namespace gr {
 			unsigned long n,
 			unsigned int decimate);
 
-      private:
+      protected:
+	std::vector<gr_complex> d_taps;
 	unsigned int d_ntaps;
-	gr_complex  *d_taps;
 	gr_complex **d_aligned_taps;
 	gr_complex  *d_output;
 	int          d_align;
@@ -144,9 +144,9 @@ namespace gr {
 			unsigned long n,
 			unsigned int decimate);
 
-      private:
+      protected:
+	std::vector<gr_complex> d_taps;
 	unsigned int d_ntaps;
-	gr_complex  *d_taps;
 	gr_complex **d_aligned_taps;
 	gr_complex  *d_output;
 	int          d_align;
@@ -175,9 +175,9 @@ namespace gr {
 			unsigned long n,
 			unsigned int decimate);
 
-      private:
+      protected:
+	std::vector<float> d_taps;
 	unsigned int d_ntaps;
-	float       *d_taps;
 	float      **d_aligned_taps;
 	short       *d_output;
 	int          d_align;
diff --git a/gr-filter/include/filter/fir_filter_with_buffer.h b/gr-filter/include/filter/fir_filter_with_buffer.h
index feebb382f..2ccb74906 100644
--- a/gr-filter/include/filter/fir_filter_with_buffer.h
+++ b/gr-filter/include/filter/fir_filter_with_buffer.h
@@ -38,10 +38,14 @@ namespace gr {
       class FILTER_API fir_filter_with_buffer_fff
       {
       private:
-	float        *d_taps;
+	std::vector<float> d_taps;
+	unsigned int  d_ntaps;
 	float        *d_buffer;
 	unsigned int  d_idx;
-	unsigned int  d_ntaps;
+	float        *d_aligned_taps;
+	float        *d_output;
+	int           d_align;
+	int           d_naligned;
 
       public:
       
@@ -130,10 +134,14 @@ namespace gr {
       class FILTER_API fir_filter_with_buffer_ccc
       {
       private:
-	gr_complex   *d_taps;
+	std::vector<gr_complex> d_taps;
+	unsigned int  d_ntaps;
 	gr_complex   *d_buffer;
 	unsigned int  d_idx;
-	unsigned int  d_ntaps;
+	gr_complex   *d_aligned_taps;
+	gr_complex   *d_output;
+	int           d_align;
+	int           d_naligned;
 
       public:
       
@@ -211,6 +219,103 @@ namespace gr {
 	std::vector<gr_complex> taps() const;
       };
 
+
+      /**************************************************************/
+
+
+      /*!
+       * \brief FIR with internal buffer for gr_complex input, gr_complex output and gr_complex taps.
+       * \ingroup filter
+       */
+      class FILTER_API fir_filter_with_buffer_ccf
+      {
+      private:
+	std::vector<float> d_taps;
+	unsigned int  d_ntaps;
+	gr_complex   *d_buffer;
+	unsigned int  d_idx;
+	float        *d_aligned_taps;
+	gr_complex   *d_output;
+	int           d_align;
+	int           d_naligned;
+
+      public:
+      
+	// CONSTRUCTORS
+
+	/*!
+	 * \brief construct new FIR with given taps.
+	 *
+	 * Note that taps must be in forward order, e.g., coefficient 0 is
+	 * stored in new_taps[0], coefficient 1 is stored in
+	 * new_taps[1], etc.
+	 */
+	fir_filter_with_buffer_ccf(const std::vector<float> &taps);
+
+	~fir_filter_with_buffer_ccf();
+
+	// MANIPULATORS
+
+	/*!
+	 * \brief compute a single output value.
+	 *
+	 * \p input is a single input value of the filter type
+	 *
+	 * \returns the filtered input value.
+	 */
+	gr_complex filter(gr_complex input);
+
+	/*!
+	 * \brief compute a single output value; designed for decimating filters.
+	 *
+	 * \p input is a single input value of the filter type. The value of dec is the
+	 *    decimating value of the filter, so input[] must have dec valid values.
+	 *    The filter pushes dec number of items onto the circ. buffer before computing
+	 *    a single output.
+	 *
+	 * \returns the filtered input value.
+	 */
+	gr_complex filter(const gr_complex input[], unsigned long dec);
+
+	/*!
+	 * \brief compute an array of N output values.
+	 *
+	 * \p input must have (n - 1 + ntaps()) valid entries.
+	 * input[0] .. input[n - 1 + ntaps() - 1] are referenced to compute the output values.
+	 */
+	void filterN(gr_complex output[],
+		     const gr_complex input[],
+		     unsigned long n);
+
+	/*!
+	 * \brief compute an array of N output values, decimating the input
+	 *
+	 * \p input must have (decimate * (n - 1) + ntaps()) valid entries.
+	 * input[0] .. input[decimate * (n - 1) + ntaps() - 1] are referenced to
+	 * compute the output values.
+	 */
+	void filterNdec(gr_complex output[], const gr_complex input[],
+			unsigned long n, unsigned long decimate);
+
+	// ACCESSORS
+
+	/*!
+	 * \return number of taps in filter.
+	 */
+	unsigned int ntaps() const { return d_ntaps; }
+
+	/*!
+	 * \brief install \p new_taps as the current taps.
+	 */
+	void set_taps(const std::vector<float> &taps);
+
+	/*!
+	 * \return current taps
+	 */
+	std::vector<float> taps() const;
+      };
+
+
     } /* namespace kernel */
   } /* namespace filter */
 } /* namespace gr */
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index be8017400..6c82dda5e 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -36,7 +36,7 @@ namespace gr {
 	d_align = volk_get_alignment();
 	d_naligned = d_align / sizeof(float);
 
-	d_taps = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
 
 	// Make sure the output sample is always aligned, too.
@@ -45,17 +45,14 @@ namespace gr {
       
       fir_filter_fff::~fir_filter_fff()
       {
-	// Free taps
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-	}
-
 	// Free all aligned taps
-	for(int i = 0; i < d_naligned; i++) {
-	  fft::free(d_aligned_taps[i]);
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
-	fft::free(d_aligned_taps);
 
 	// Free output sample
 	fft::free(d_output);
@@ -65,37 +62,33 @@ namespace gr {
       fir_filter_fff::set_taps(const std::vector<float> &taps)
       {
 	// Free the taps if already allocated
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-
+	if(d_aligned_taps!= NULL) {
 	  for(int i = 0; i < d_naligned; i++) {
 	    fft::free(d_aligned_taps[i]);
 	  }
 	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
 	
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_float(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
-	}
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
 
 	// Make a set of taps at all possible arch alignments
 	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
 	for(int i = 0; i < d_naligned; i++) {
 	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
 	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
-	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
       }
       
       std::vector<float>
       fir_filter_fff::taps() const
       {
-	std::vector<float> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<float> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -148,7 +141,7 @@ namespace gr {
 	d_align = volk_get_alignment();
 	d_naligned = d_align / sizeof(gr_complex);
 
-	d_taps = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
 
 	// Make sure the output sample is always aligned, too.
@@ -157,42 +150,50 @@ namespace gr {
       
       fir_filter_ccf::~fir_filter_ccf()
       {
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
+	// Free all aligned taps
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
-    }
+
+	// Free output sample
+	fft::free(d_output);
+      }
       
       void
       fir_filter_ccf::set_taps(const std::vector<float> &taps)
       {
 	// Free the taps if already allocated
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
 	
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_float(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
-	}
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
 
 	// Make a set of taps at all possible arch alignments
 	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
 	for(int i = 0; i < d_naligned; i++) {
 	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
 	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
-	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
       }
       
       std::vector<float>
       fir_filter_ccf::taps() const
       {
-	std::vector<float> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<float> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -245,7 +246,7 @@ namespace gr {
 	d_align = volk_get_alignment();
 	d_naligned = d_align / sizeof(gr_complex);
 
-	d_taps = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
 
 	// Make sure the output sample is always aligned, too.
@@ -254,57 +255,50 @@ namespace gr {
       
       fir_filter_ccc::~fir_filter_ccc()
       {
-	// Free taps
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-	}
-
 	// Free all aligned taps
-	for(int i = 0; i < d_naligned; i++) {
-	  fft::free(d_aligned_taps[i]);
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
-	fft::free(d_aligned_taps);
 
 	// Free output sample
 	fft::free(d_output);
-    }
+      }
       
       void
       fir_filter_ccc::set_taps(const std::vector<gr_complex> &taps)
       {
 	// Free the taps if already allocated
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-
+	if(d_aligned_taps != NULL) {
 	  for(int i = 0; i < d_naligned; i++) {
 	    fft::free(d_aligned_taps[i]);
 	  }
 	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
 	
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_complex(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
-	}
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
 
 	// Make a set of taps at all possible arch alignments
 	d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
 	for(int i = 0; i < d_naligned; i++) {
 	  d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
 	  memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
-	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
       }
       
       std::vector<gr_complex>
       fir_filter_ccc::taps() const
       {
-	std::vector<gr_complex> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<gr_complex> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -357,7 +351,7 @@ namespace gr {
 	d_align = volk_get_alignment();
 	d_naligned = d_align / sizeof(short);
 
-	d_taps = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
 
 	// Make sure the output sample is always aligned, too.
@@ -366,58 +360,50 @@ namespace gr {
       
       fir_filter_scc::~fir_filter_scc()
       {
-	// Free taps
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-	}
-
 	// Free all aligned taps
-	for(int i = 0; i < d_naligned; i++) {
-	  fft::free(d_aligned_taps[i]);
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
-	fft::free(d_aligned_taps);
 
 	// Free output sample
 	fft::free(d_output);
-    }
+      }
       
       void
       fir_filter_scc::set_taps(const std::vector<gr_complex> &taps)
       {
 	// Free the taps if already allocated
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-
+	if(d_aligned_taps != NULL) {
 	  for(int i = 0; i < d_naligned; i++) {
 	    fft::free(d_aligned_taps[i]);
 	  }
 	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
 	
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_complex(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
-	}
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
 
 	// Make a set of taps at all possible arch alignments
 	d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
 	for(int i = 0; i < d_naligned; i++) {
 	  d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
 	  memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
-	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
-
       }
       
       std::vector<gr_complex>
       fir_filter_scc::taps() const
       {
-	std::vector<gr_complex> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<gr_complex> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -471,7 +457,7 @@ namespace gr {
 	d_align = volk_get_alignment();
 	d_naligned = d_align / sizeof(float);
 
-	d_taps = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
 
 	// Make sure the output sample is always aligned, too.
@@ -480,57 +466,50 @@ namespace gr {
       
       fir_filter_fsf::~fir_filter_fsf()
       {
-	// Free taps
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-	}
-
-	// Free all aligned taps
-	for(int i = 0; i < d_naligned; i++) {
-	  fft::free(d_aligned_taps[i]);
+      	// Free all aligned taps
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
-	fft::free(d_aligned_taps);
 
 	// Free output sample
 	fft::free(d_output);
-    }
+      }
       
       void
       fir_filter_fsf::set_taps(const std::vector<float> &taps)
       {
 	// Free the taps if already allocated
-	if(d_taps != NULL) {
-	  fft::free(d_taps);
-	  d_taps = NULL;
-
+	if(d_aligned_taps != NULL) {
 	  for(int i = 0; i < d_naligned; i++) {
 	    fft::free(d_aligned_taps[i]);
 	  }
 	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
 	}
 	
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_float(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
-	}
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
 
 	// Make a set of taps at all possible arch alignments
 	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
 	for(int i = 0; i < d_naligned; i++) {
 	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
 	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
-	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
       }
       
       std::vector<float>
       fir_filter_fsf::taps() const
       {
-	std::vector<float> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<float> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -550,12 +529,6 @@ namespace gr {
 				   d_aligned_taps[al],
 				   (d_ntaps+al));
 
-	//float out = 0;
-	//for(unsigned int i = 0; i < d_ntaps; i++) {
-	//  out += d_taps[i] * input[i];
-	//}
-	//*d_output = (short)out;
-
 	return *d_output;
       }
       
diff --git a/gr-filter/lib/fir_filter_with_buffer.cc b/gr-filter/lib/fir_filter_with_buffer.cc
index a2b804a08..b1cc589a5 100644
--- a/gr-filter/lib/fir_filter_with_buffer.cc
+++ b/gr-filter/lib/fir_filter_with_buffer.cc
@@ -27,6 +27,7 @@
 #include <filter/fir_filter_with_buffer.h>
 #include <fft/fft.h>
 #include <volk/volk.h>
+#include <algorithm>
 
 namespace gr {
   namespace filter {
@@ -34,14 +35,30 @@ namespace gr {
 
       fir_filter_with_buffer_fff::fir_filter_with_buffer_fff(const std::vector<float> &taps)
       {
+	d_align = volk_get_alignment();
+	d_naligned = d_align / sizeof(float);
+
 	d_buffer = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
+
+	// Make sure the output sample is always aligned, too.
+	d_output = fft::malloc_float(1);
       }
 
       fir_filter_with_buffer_fff::~fir_filter_with_buffer_fff()
       {
-	if(d_buffer != NULL)
+	if(d_buffer != NULL) {
 	  fft::free(d_buffer);
+	  d_buffer = NULL;
+	}
+	
+	// Free aligned taps
+	fft::free(d_aligned_taps);
+	d_aligned_taps = NULL;
+
+	// Free output sample
+	fft::free(d_output);
       }
 
       void
@@ -52,20 +69,32 @@ namespace gr {
 	  d_buffer = NULL;
 	}
 
+	// Free the taps if already allocated
+	if(d_aligned_taps != NULL) {
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
+
+	d_buffer = fft::malloc_float(d_ntaps);
+
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_float(d_ntaps);
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
+
+	// Allocate aligned taps
+	d_aligned_taps = fft::malloc_float(d_ntaps);
 	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
+	  d_aligned_taps[i] = d_taps[i];
 	}
+
 	d_idx = 0;
       }
 
       std::vector<float>
       fir_filter_with_buffer_fff::taps() const
       {
-	std::vector<float> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<float> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -79,11 +108,10 @@ namespace gr {
 	if(d_idx >= ntaps())
 	  d_idx = 0;
 
-	float output = 0;
-	volk_32f_x2_dot_prod_32f_u(&output, &d_buffer[d_idx],
-				   d_taps, d_ntaps);
-
-	return output;
+	volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
+				   d_aligned_taps,
+				   ntaps());
+	return *d_output;
       }
 
       float
@@ -100,10 +128,10 @@ namespace gr {
 	    d_idx = 0;
 	}
 
-	float output = 0;
-	volk_32f_x2_dot_prod_32f_u(&output, &d_buffer[d_idx],
-				   d_taps, d_ntaps);
-	return output;
+	volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
+				   d_aligned_taps,
+				   ntaps());
+	return *d_output;
       }
 
       void
@@ -135,14 +163,30 @@ namespace gr {
 
       fir_filter_with_buffer_ccc::fir_filter_with_buffer_ccc(const std::vector<gr_complex> &taps)
       {
+	d_align = volk_get_alignment();
+	d_naligned = d_align / sizeof(gr_complex);
+
 	d_buffer = NULL;
+	d_aligned_taps = NULL;
 	set_taps(taps);
+
+	// Make sure the output sample is always aligned, too.
+	d_output = fft::malloc_complex(1);
       }
 
       fir_filter_with_buffer_ccc::~fir_filter_with_buffer_ccc()
       {
-	if(d_buffer != NULL)
+	if(d_buffer != NULL) {
 	  fft::free(d_buffer);
+	  d_buffer = NULL;
+	}
+	
+	// Free aligned taps
+	fft::free(d_aligned_taps);
+	d_aligned_taps = NULL;
+
+	// Free output sample
+	fft::free(d_output);
       }
 
       void
@@ -153,20 +197,32 @@ namespace gr {
 	  d_buffer = NULL;
 	}
 
+	// Free the taps if already allocated
+	if(d_aligned_taps != NULL) {
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
+
+	d_buffer = fft::malloc_complex(d_ntaps);
+
 	d_ntaps = (int)taps.size();
-	d_taps = fft::malloc_complex(d_ntaps);
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
+
+	// Allocate aligned taps
+	d_aligned_taps = fft::malloc_complex(d_ntaps);
 	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_taps[d_ntaps-i-1] = taps[i];
+	  d_aligned_taps[i] = d_taps[i];
 	}
+
 	d_idx = 0;
       }
 
       std::vector<gr_complex>
       fir_filter_with_buffer_ccc::taps() const
       {
-	std::vector<gr_complex> t;
-	for(unsigned int i = 0; i < d_ntaps; i++)
-	  t.push_back(d_taps[d_ntaps-i-1]);
+	std::vector<gr_complex> t = d_taps;
+	std::reverse(t.begin(), t.end());
 	return t;
       }
 
@@ -180,11 +236,10 @@ namespace gr {
 	if(d_idx >= ntaps())
 	  d_idx = 0;
 
-	gr_complex output = 0;
-	volk_32fc_x2_dot_prod_32fc_u(&output, &d_buffer[d_idx],
-				     d_taps, d_ntaps);
-
-	return output;
+	volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
+				     d_aligned_taps,
+				     ntaps());
+	return *d_output;
       }
 
       gr_complex
@@ -201,10 +256,10 @@ namespace gr {
 	    d_idx = 0;
 	}
 
-	gr_complex output = 0;
-	volk_32fc_x2_dot_prod_32fc_u(&output, &d_buffer[d_idx],
-				     d_taps, d_ntaps);
-	return output;
+	volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
+				     d_aligned_taps,
+				     ntaps());
+	return *d_output;
       }
 
       void
@@ -230,6 +285,134 @@ namespace gr {
 	}
       }
 
+      
+      /**************************************************************/
+
+
+      fir_filter_with_buffer_ccf::fir_filter_with_buffer_ccf(const std::vector<float> &taps)
+      {
+	d_align = volk_get_alignment();
+	d_naligned = d_align / sizeof(gr_complex);
+
+	d_buffer = NULL;
+	d_aligned_taps = NULL;
+	set_taps(taps);
+
+	// Make sure the output sample is always aligned, too.
+	d_output = fft::malloc_complex(1);
+      }
+
+      fir_filter_with_buffer_ccf::~fir_filter_with_buffer_ccf()
+      {
+	if(d_buffer != NULL) {
+	  fft::free(d_buffer);
+	  d_buffer = NULL;
+	}
+	
+	// Free aligned taps
+	fft::free(d_aligned_taps);
+	d_aligned_taps = NULL;
+
+	// Free output sample
+	fft::free(d_output);
+      }
+
+      void
+      fir_filter_with_buffer_ccf::set_taps(const std::vector<float> &taps)
+      {
+	if(d_buffer != NULL) {
+	  fft::free(d_buffer);
+	  d_buffer = NULL;
+	}
+
+	// Free the taps if already allocated
+	if(d_aligned_taps != NULL) {
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
+
+	d_buffer = fft::malloc_complex(d_ntaps);
+
+	d_ntaps = (int)taps.size();
+	d_taps = taps;
+	std::reverse(d_taps.begin(), d_taps.end());
+
+	// Allocate aligned taps
+	d_aligned_taps = fft::malloc_float(d_ntaps);
+	for(unsigned int i = 0; i < d_ntaps; i++) {
+	  d_aligned_taps[i] = d_taps[i];
+	}
+
+	d_idx = 0;
+      }
+
+      std::vector<float>
+      fir_filter_with_buffer_ccf::taps() const
+      {
+	std::vector<float> t = d_taps;
+	std::reverse(t.begin(), t.end());
+	return t;
+      }
+
+      gr_complex
+      fir_filter_with_buffer_ccf::filter(gr_complex input)
+      {
+	d_buffer[d_idx] = input;
+	d_buffer[d_idx+ntaps()] = input;
+
+	d_idx++;
+	if(d_idx >= ntaps())
+	  d_idx = 0;
+
+	volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
+				      d_aligned_taps,
+				      ntaps());
+	return *d_output;
+      }
+
+      gr_complex
+      fir_filter_with_buffer_ccf::filter(const gr_complex input[],
+					 unsigned long dec)
+      {
+	unsigned int i;
+
+	for(i = 0; i < dec; i++) {
+	  d_buffer[d_idx] = input[i];
+	  d_buffer[d_idx+ntaps()] = input[i];
+	  d_idx++;
+	  if(d_idx >= ntaps())
+	    d_idx = 0;
+	}
+
+	volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
+				      d_aligned_taps,
+				      ntaps());
+	return *d_output;
+      }
+
+      void
+      fir_filter_with_buffer_ccf::filterN(gr_complex output[],
+					  const gr_complex input[],
+					  unsigned long n)
+      {
+	for(unsigned long i = 0; i < n; i++) {
+	  output[i] = filter(input[i]);
+	}
+      }
+
+      void
+      fir_filter_with_buffer_ccf::filterNdec(gr_complex output[],
+					     const gr_complex input[],
+					     unsigned long n,
+					     unsigned long decimate)
+      {
+	unsigned long j = 0;
+	for(unsigned long i = 0; i < n; i++) {
+	  output[i] = filter(&input[j], decimate);
+	  j += decimate;
+	}
+      }
+
 
     } /* namespace kernel */
   } /* namespace filter */