6 files changed, 397 insertions, 2 deletions
diff --git a/gr-filter/include/filter/CMakeLists.txt b/gr-filter/include/filter/CMakeLists.txt
index 5b209873c..2620d3f54 100644
--- a/gr-filter/include/filter/CMakeLists.txt
+++ b/gr-filter/include/filter/CMakeLists.txt
@@ -64,7 +64,7 @@ endmacro(expand_h)
 ########################################################################
 # Invoke macro to generate various sources
 #######################################################################
-expand_h(fir_filter_XXX fff ccf ccc)
+expand_h(fir_filter_XXX fff ccf ccc scc fsf)
 
 add_custom_target(filter_generated_includes DEPENDS
     ${generated_includes}
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index 8bfaa4f50..1fb3afb4d 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -122,6 +122,68 @@ namespace gr {
 	int          d_naligned;
       };
 
+      /**************************************************************/
+      
+      class FILTER_API fir_filter_scc
+      {
+      public:
+	fir_filter_scc(int decimation,
+		       const std::vector<gr_complex> &taps);
+	~fir_filter_scc();
+
+	void set_taps(const std::vector<gr_complex> &taps);
+	std::vector<gr_complex> taps() const;
+	unsigned int ntaps() const;
+
+	gr_complex filter(const short input[]);
+	void filterN(gr_complex output[],
+		     const short input[],
+		     unsigned long n);
+	void filterNdec(gr_complex output[],
+			const short input[],
+			unsigned long n,
+			unsigned int decimate);
+
+      private:
+	unsigned int d_ntaps;
+	gr_complex  *d_taps;
+	gr_complex **d_aligned_taps;
+	gr_complex  *d_output;
+	int          d_align;
+	int          d_naligned;
+      };
+
+      /**************************************************************/
+      
+      class FILTER_API fir_filter_fsf
+      {
+      public:
+	fir_filter_fsf(int decimation,
+		       const std::vector<float> &taps);
+	~fir_filter_fsf();
+
+	void set_taps(const std::vector<float> &taps);
+	std::vector<float> taps() const;
+	unsigned int ntaps() const;
+
+	short filter(const float input[]);
+	void filterN(short output[],
+		     const float input[],
+		     unsigned long n);
+	void filterNdec(short output[],
+			const float input[],
+			unsigned long n,
+			unsigned int decimate);
+
+      private:
+	unsigned int d_ntaps;
+	float       *d_taps;
+	float      **d_aligned_taps;
+	short       *d_output;
+	int          d_align;
+	int          d_naligned;
+      };
+
     } /* namespace kernel */
   } /* namespace filter */
 } /* namespace gr */
diff --git a/gr-filter/lib/CMakeLists.txt b/gr-filter/lib/CMakeLists.txt
index b51a23bab..f5dbd1bb3 100644
--- a/gr-filter/lib/CMakeLists.txt
+++ b/gr-filter/lib/CMakeLists.txt
@@ -80,7 +80,7 @@ endmacro(expand_cc)
 ########################################################################
 # Invoke macro to generate various sources
 ########################################################################
-expand_cc(fir_filter_XXX_impl         fff ccf ccc)
+expand_cc(fir_filter_XXX_impl         fff ccf ccc scc fsf)
 
 
 ########################################################################
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 18568da9d..be8017400 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -349,6 +349,238 @@ namespace gr {
 	}
       }
       
+      /**************************************************************/
+
+      fir_filter_scc::fir_filter_scc(int decimation,
+				     const std::vector<gr_complex> &taps)
+      {
+	d_align = volk_get_alignment();
+	d_naligned = d_align / sizeof(short);
+
+	d_taps = NULL;
+	set_taps(taps);
+
+	// Make sure the output sample is always aligned, too.
+	d_output = fft::malloc_complex(1);
+      }
+      
+      fir_filter_scc::~fir_filter_scc()
+      {
+	// Free taps
+	if(d_taps != NULL) {
+	  fft::free(d_taps);
+	  d_taps = NULL;
+	}
+
+	// Free all aligned taps
+	for(int i = 0; i < d_naligned; i++) {
+	  fft::free(d_aligned_taps[i]);
+	}
+	fft::free(d_aligned_taps);
+
+	// Free output sample
+	fft::free(d_output);
+    }
+      
+      void
+      fir_filter_scc::set_taps(const std::vector<gr_complex> &taps)
+      {
+	// Free the taps if already allocated
+	if(d_taps != NULL) {
+	  fft::free(d_taps);
+	  d_taps = NULL;
+
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	}
+	
+	d_ntaps = (int)taps.size();
+	d_taps = fft::malloc_complex(d_ntaps);
+	for(unsigned int i = 0; i < d_ntaps; i++) {
+	  d_taps[d_ntaps-i-1] = taps[i];
+	}
+
+	// Make a set of taps at all possible arch alignments
+	d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
+	for(int i = 0; i < d_naligned; i++) {
+	  d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
+	  memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
+	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(gr_complex)*(d_ntaps));
+	}
+
+      }
+      
+      std::vector<gr_complex>
+      fir_filter_scc::taps() const
+      {
+	std::vector<gr_complex> t;
+	for(unsigned int i = 0; i < d_ntaps; i++)
+	  t.push_back(d_taps[d_ntaps-i-1]);
+	return t;
+      }
+
+      unsigned int
+      fir_filter_scc::ntaps() const
+      {
+	return d_ntaps;
+      }
+      
+      gr_complex
+      fir_filter_scc::filter(const short input[])
+      {
+	const short *ar = (short *)((unsigned long) input & ~(d_align-1));
+	unsigned al = input - ar;
+
+	volk_16i_32fc_dot_prod_32fc_a(d_output, ar,
+				      d_aligned_taps[al],
+				      (d_ntaps+al));
+
+	return *d_output;
+      }
+      
+      void
+      fir_filter_scc::filterN(gr_complex output[],
+			      const short input[],
+			      unsigned long n)
+      {
+	for(unsigned long i = 0; i < n; i++)
+	  output[i] = filter(&input[i]);
+      }
+      
+      
+      void
+      fir_filter_scc::filterNdec(gr_complex output[],
+				 const short input[],
+				 unsigned long n,
+				 unsigned int decimate)
+      {
+	unsigned long j = 0;
+	for(unsigned long i = 0; i < n; i++){
+	  output[i] = filter(&input[j]);
+	  j += decimate;
+	}
+      }
+
+      /**************************************************************/
+
+      fir_filter_fsf::fir_filter_fsf(int decimation,
+				     const std::vector<float> &taps)
+      {
+	d_align = volk_get_alignment();
+	d_naligned = d_align / sizeof(float);
+
+	d_taps = NULL;
+	set_taps(taps);
+
+	// Make sure the output sample is always aligned, too.
+	d_output = (short*)fft::malloc_float(1);
+      }
+      
+      fir_filter_fsf::~fir_filter_fsf()
+      {
+	// Free taps
+	if(d_taps != NULL) {
+	  fft::free(d_taps);
+	  d_taps = NULL;
+	}
+
+	// Free all aligned taps
+	for(int i = 0; i < d_naligned; i++) {
+	  fft::free(d_aligned_taps[i]);
+	}
+	fft::free(d_aligned_taps);
+
+	// Free output sample
+	fft::free(d_output);
+    }
+      
+      void
+      fir_filter_fsf::set_taps(const std::vector<float> &taps)
+      {
+	// Free the taps if already allocated
+	if(d_taps != NULL) {
+	  fft::free(d_taps);
+	  d_taps = NULL;
+
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	}
+	
+	d_ntaps = (int)taps.size();
+	d_taps = fft::malloc_float(d_ntaps);
+	for(unsigned int i = 0; i < d_ntaps; i++) {
+	  d_taps[d_ntaps-i-1] = taps[i];
+	}
+
+	// Make a set of taps at all possible arch alignments
+	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+	for(int i = 0; i < d_naligned; i++) {
+	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
+	  memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps));
+	}
+      }
+      
+      std::vector<float>
+      fir_filter_fsf::taps() const
+      {
+	std::vector<float> t;
+	for(unsigned int i = 0; i < d_ntaps; i++)
+	  t.push_back(d_taps[d_ntaps-i-1]);
+	return t;
+      }
+
+      unsigned int
+      fir_filter_fsf::ntaps() const
+      {
+	return d_ntaps;
+      }
+      
+      short
+      fir_filter_fsf::filter(const float input[])
+      {
+	const float *ar = (float *)((unsigned long) input & ~(d_align-1));
+	unsigned al = input - ar;
+
+	volk_32f_x2_dot_prod_16i_a(d_output, ar,
+				   d_aligned_taps[al],
+				   (d_ntaps+al));
+
+	//float out = 0;
+	//for(unsigned int i = 0; i < d_ntaps; i++) {
+	//  out += d_taps[i] * input[i];
+	//}
+	//*d_output = (short)out;
+
+	return *d_output;
+      }
+      
+      void
+      fir_filter_fsf::filterN(short output[],
+			      const float input[],
+			      unsigned long n)
+      {
+	for(unsigned long i = 0; i < n; i++)
+	  output[i] = filter(&input[i]);
+      }
+
+      void
+      fir_filter_fsf::filterNdec(short output[],
+				 const float input[],
+				 unsigned long n,
+				 unsigned int decimate)
+      {
+	unsigned long j = 0;
+	for(unsigned long i = 0; i < n; i++){
+	  output[i] = filter(&input[j]);
+	  j += decimate;
+	}
+      }
+
     } /* namespace kernel */
   } /* namespace filter */
 } /* namespace gr */
diff --git a/gr-filter/python/qa_fir_filter.py b/gr-filter/python/qa_fir_filter.py
index ac20286cc..2a61498a2 100755
--- a/gr-filter/python/qa_fir_filter.py
+++ b/gr-filter/python/qa_fir_filter.py
@@ -218,6 +218,101 @@ class test_filter(gr_unittest.TestCase):
         self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5)
 
 
+    def test_fir_filter_scc_001(self):
+        src_data = 40*[1, 2, 3, 4]
+        expected_data = ((0.5+1j), (1.5+3j), (3+6j), (5+10j), (5.5+11j),
+                         (6.5+13j), (8+16j), (10+20j), (10.5+21j), (11.5+23j),
+                         (13+26j), (15+30j), (15.5+31j), (16.5+33j), (18+36j),
+                         (20+40j), (20.5+41j), (21.5+43j), (23+46j), (25+50j),
+                         (25.5+51j), (26.5+53j), (28+56j), (30+60j), (30.5+61j),
+                         (31.5+63j), (33+66j), (35+70j), (35.5+71j), (36.5+73j),
+                         (38+76j), (40+80j), (40.5+81j), (41.5+83j), (43+86j),
+                         (45+90j), (45.5+91j), (46.5+93j), (48+96j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j))
+        src = gr.vector_source_s(src_data)
+        op  = filter.fir_filter_scc(1, 20*[0.5+1j, 0.5+1j])
+        dst = gr.vector_sink_c()
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = dst.data()
+        self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5)
+
+
+    def test_fir_filter_scc_002(self):
+        src_data = 40*[1, 2, 3, 4]
+        expected_data = ((0.5+1j), (5.5+11j), (10.5+21j), (15.5+31j), (20.5+41j),
+                         (25.5+51j), (30.5+61j), (35.5+71j), (40.5+81j), (45.5+91j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j),
+                         (50+100j), (50+100j), (50+100j), (50+100j), (50+100j))
+        src = gr.vector_source_s(src_data)
+        op  = filter.fir_filter_scc(4, 20*[0.5+1j, 0.5+1j])
+        dst = gr.vector_sink_c()
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = dst.data()
+        self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5)
+
+    def test_fir_filter_fsf_001(self):
+        src_data = 40*[1, 2, 3, 4]
+        expected_data =(0, 1, 3, 5, 5, 6, 8, 10, 10, 11, 13, 15, 15, 16, 18, 20, 20,
+                        21, 23, 25, 25, 26, 28, 30, 30, 31, 33, 35, 35, 36, 38, 40, 40,
+                        41, 43, 45, 45, 46, 48, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                        50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50)
+        src = gr.vector_source_f(src_data)
+        op  = filter.fir_filter_fsf(1, 20*[0.5, 0.5])
+        dst = gr.vector_sink_s()
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = dst.data()
+        self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5)
+
+
+    def test_fir_filter_fsf_002(self):
+        src_data = 40*[1, 2, 3, 4]
+        expected_data = (0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 50, 50, 50, 50,
+                         50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+                         50, 50, 50, 50, 50, 50, 50, 50, 50, 50)
+        src = gr.vector_source_f(src_data)
+        op  = filter.fir_filter_fsf(4, 20*[0.5, 0.5])
+        dst = gr.vector_sink_s()
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = dst.data()
+        self.assertComplexTuplesAlmostEqual(expected_data, result_data, 5)
+
 if __name__ == '__main__':
     gr_unittest.run(test_filter, "test_filter.xml")
 
diff --git a/gr-filter/swig/filter_swig.i b/gr-filter/swig/filter_swig.i
index cc15b5722..c9de3fb9a 100644
--- a/gr-filter/swig/filter_swig.i
+++ b/gr-filter/swig/filter_swig.i
@@ -36,6 +36,8 @@
 #include "filter/fir_filter_fff.h"
 #include "filter/fir_filter_ccf.h"
 #include "filter/fir_filter_ccc.h"
+#include "filter/fir_filter_scc.h"
+#include "filter/fir_filter_fsf.h"
 #include "filter/fft_filter_ccc.h"
 #include "filter/fft_filter_fff.h"
 #include "filter/hilbert_fc.h"
@@ -50,6 +52,8 @@
 %include "filter/fir_filter_fff.h"
 %include "filter/fir_filter_ccf.h"
 %include "filter/fir_filter_ccc.h"
+%include "filter/fir_filter_scc.h"
+%include "filter/fir_filter_fsf.h"
 %include "filter/fft_filter_ccc.h"
 %include "filter/fft_filter_fff.h"
 %include "filter/hilbert_fc.h"
@@ -61,6 +65,8 @@ GR_SWIG_BLOCK_MAGIC2(filter, filter_delay_fc);
 GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_fff);
 GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_ccf);
 GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_ccc);
+GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_scc);
+GR_SWIG_BLOCK_MAGIC2(filter, fir_filter_fsf);
 GR_SWIG_BLOCK_MAGIC2(filter, fft_filter_ccc);
 GR_SWIG_BLOCK_MAGIC2(filter, fft_filter_fff);
 GR_SWIG_BLOCK_MAGIC2(filter, hilbert_fc);