filter: fixed fir_filter_with_buffer and added QA code to check.

author: Tom Rondeau 2012-06-17 20:35:53 -0400
committer: Tom Rondeau 2012-06-17 20:35:53 -0400
commit: 6b2dbab570adceb3a7fa29f298da24a5e53cbf64 (patch)
tree: b42adb8bd5ba973bada96d12077790b19d6c61e7
parent: 765d82d839e1773c77556e5d73ab229cbe6b96dc (diff)
download: gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.gz
gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.bz2
gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.zip
7 files changed, 604 insertions, 43 deletions
diff --git a/gr-filter/include/filter/fir_filter_with_buffer.h b/gr-filter/include/filter/fir_filter_with_buffer.h
index 2ccb74906..8b5d9e064 100644
--- a/gr-filter/include/filter/fir_filter_with_buffer.h
+++ b/gr-filter/include/filter/fir_filter_with_buffer.h
@@ -42,7 +42,7 @@ namespace gr {
 	unsigned int  d_ntaps;
 	float        *d_buffer;
 	unsigned int  d_idx;
-	float        *d_aligned_taps;
+	float       **d_aligned_taps;
 	float        *d_output;
 	int           d_align;
 	int           d_naligned;
@@ -138,7 +138,7 @@ namespace gr {
 	unsigned int  d_ntaps;
 	gr_complex   *d_buffer;
 	unsigned int  d_idx;
-	gr_complex   *d_aligned_taps;
+	gr_complex  **d_aligned_taps;
 	gr_complex   *d_output;
 	int           d_align;
 	int           d_naligned;
@@ -234,7 +234,7 @@ namespace gr {
 	unsigned int  d_ntaps;
 	gr_complex   *d_buffer;
 	unsigned int  d_idx;
-	float        *d_aligned_taps;
+	float       **d_aligned_taps;
 	gr_complex   *d_output;
 	int           d_align;
 	int           d_naligned;
diff --git a/gr-filter/lib/CMakeLists.txt b/gr-filter/lib/CMakeLists.txt
index 4d950230a..97afe7139 100644
--- a/gr-filter/lib/CMakeLists.txt
+++ b/gr-filter/lib/CMakeLists.txt
@@ -150,6 +150,7 @@ list(APPEND test_gr_filter_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/test_gr_filter.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/qa_filter.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/qa_firdes.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/qa_fir_filter_with_buffer.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_cc.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_ff.cc
 )
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 7ca1fae19..8e585f245 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -24,7 +24,6 @@
 #include <fft/fft.h>
 #include <volk/volk.h>
 #include <cstdio>
-#include <float_dotprod_x86.h>
 
 namespace gr {
   namespace filter {
diff --git a/gr-filter/lib/fir_filter_with_buffer.cc b/gr-filter/lib/fir_filter_with_buffer.cc
index b1cc589a5..58ef67fb7 100644
--- a/gr-filter/lib/fir_filter_with_buffer.cc
+++ b/gr-filter/lib/fir_filter_with_buffer.cc
@@ -28,6 +28,7 @@
 #include <fft/fft.h>
 #include <volk/volk.h>
 #include <algorithm>
+#include <cstdio>
 
 namespace gr {
   namespace filter {
@@ -54,8 +55,13 @@ namespace gr {
 	}
 	
 	// Free aligned taps
-	fft::free(d_aligned_taps);
-	d_aligned_taps = NULL;
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
 
 	// Free output sample
 	fft::free(d_output);
@@ -71,20 +77,27 @@ namespace gr {
 
 	// Free the taps if already allocated
 	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
 	  fft::free(d_aligned_taps);
 	  d_aligned_taps = NULL;
 	}
 
-	d_buffer = fft::malloc_float(d_ntaps);
-
 	d_ntaps = (int)taps.size();
 	d_taps = taps;
 	std::reverse(d_taps.begin(), d_taps.end());
 
+	d_buffer = fft::malloc_float(2*d_ntaps);
+	memset(d_buffer, 0, 2*d_ntaps*sizeof(float));
+
 	// Allocate aligned taps
-	d_aligned_taps = fft::malloc_float(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_aligned_taps[i] = d_taps[i];
+	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+	for(int i = 0; i < d_naligned; i++) {
+	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
 
 	d_idx = 0;
@@ -108,9 +121,12 @@ namespace gr {
 	if(d_idx >= ntaps())
 	  d_idx = 0;
 
-	volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
-				   d_aligned_taps,
-				   ntaps());
+	const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32f_x2_dot_prod_32f_a(d_output, ar,
+				   d_aligned_taps[al],
+				   ntaps()+al);
 	return *d_output;
       }
 
@@ -128,9 +144,12 @@ namespace gr {
 	    d_idx = 0;
 	}
 
-	volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
-				   d_aligned_taps,
-				   ntaps());
+	const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32f_x2_dot_prod_32f_a(d_output, ar,
+				   d_aligned_taps[al],
+				   ntaps()+al);
 	return *d_output;
       }
 
@@ -182,8 +201,13 @@ namespace gr {
 	}
 	
 	// Free aligned taps
-	fft::free(d_aligned_taps);
-	d_aligned_taps = NULL;
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
 
 	// Free output sample
 	fft::free(d_output);
@@ -199,20 +223,27 @@ namespace gr {
 
 	// Free the taps if already allocated
 	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
 	  fft::free(d_aligned_taps);
 	  d_aligned_taps = NULL;
 	}
 
-	d_buffer = fft::malloc_complex(d_ntaps);
-
 	d_ntaps = (int)taps.size();
 	d_taps = taps;
 	std::reverse(d_taps.begin(), d_taps.end());
 
+	d_buffer = fft::malloc_complex(2*d_ntaps);
+	memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex));
+
 	// Allocate aligned taps
-	d_aligned_taps = fft::malloc_complex(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_aligned_taps[i] = d_taps[i];
+	d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
+	for(int i = 0; i < d_naligned; i++) {
+	  d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
+	  memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
 
 	d_idx = 0;
@@ -236,9 +267,12 @@ namespace gr {
 	if(d_idx >= ntaps())
 	  d_idx = 0;
 
-	volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
-				     d_aligned_taps,
-				     ntaps());
+	const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32fc_x2_dot_prod_32fc_a(d_output, ar,
+				     d_aligned_taps[al],
+				     (ntaps()+al)*sizeof(gr_complex));
 	return *d_output;
       }
 
@@ -256,9 +290,12 @@ namespace gr {
 	    d_idx = 0;
 	}
 
-	volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
-				     d_aligned_taps,
-				     ntaps());
+	const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32fc_x2_dot_prod_32fc_a(d_output, ar,
+				     d_aligned_taps[al],
+				     (ntaps()+al)*sizeof(gr_complex));
 	return *d_output;
       }
 
@@ -310,8 +347,13 @@ namespace gr {
 	}
 	
 	// Free aligned taps
-	fft::free(d_aligned_taps);
-	d_aligned_taps = NULL;
+	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
+	  fft::free(d_aligned_taps);
+	  d_aligned_taps = NULL;
+	}
 
 	// Free output sample
 	fft::free(d_output);
@@ -327,20 +369,27 @@ namespace gr {
 
 	// Free the taps if already allocated
 	if(d_aligned_taps != NULL) {
+	  for(int i = 0; i < d_naligned; i++) {
+	    fft::free(d_aligned_taps[i]);
+	  }
 	  fft::free(d_aligned_taps);
 	  d_aligned_taps = NULL;
 	}
 
-	d_buffer = fft::malloc_complex(d_ntaps);
-
 	d_ntaps = (int)taps.size();
 	d_taps = taps;
 	std::reverse(d_taps.begin(), d_taps.end());
 
+	d_buffer = fft::malloc_complex(2*d_ntaps);
+	memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex));
+
 	// Allocate aligned taps
-	d_aligned_taps = fft::malloc_float(d_ntaps);
-	for(unsigned int i = 0; i < d_ntaps; i++) {
-	  d_aligned_taps[i] = d_taps[i];
+	d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+	for(int i = 0; i < d_naligned; i++) {
+	  d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+	  memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
+	  for(unsigned int j = 0; j < d_ntaps; j++)
+	    d_aligned_taps[i][i+j] = d_taps[j];
 	}
 
 	d_idx = 0;
@@ -364,9 +413,12 @@ namespace gr {
 	if(d_idx >= ntaps())
 	  d_idx = 0;
 
-	volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
-				      d_aligned_taps,
-				      ntaps());
+	const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32fc_32f_dot_prod_32fc_a(d_output, ar,
+				      d_aligned_taps[al],
+				      ntaps()+al);
 	return *d_output;
       }
 
@@ -384,9 +436,12 @@ namespace gr {
 	    d_idx = 0;
 	}
 
-	volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
-				      d_aligned_taps,
-				      ntaps());
+	const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+	unsigned al = (&d_buffer[d_idx]) - ar;
+
+	volk_32fc_32f_dot_prod_32fc_a(d_output, ar,
+				      d_aligned_taps[al],
+				      ntaps()+al);
 	return *d_output;
       }
 
diff --git a/gr-filter/lib/qa_filter.cc b/gr-filter/lib/qa_filter.cc
index 8aa24651c..b7760b19e 100644
--- a/gr-filter/lib/qa_filter.cc
+++ b/gr-filter/lib/qa_filter.cc
@@ -27,6 +27,7 @@
 
 #include <qa_filter.h>
 #include <qa_firdes.h>
+#include <qa_fir_filter_with_buffer.h>
 #include <qa_mmse_fir_interpolator_cc.h>
 #include <qa_mmse_fir_interpolator_ff.h>
 
@@ -36,6 +37,9 @@ qa_gr_filter::suite ()
   CppUnit::TestSuite *s = new CppUnit::TestSuite ("gr-filter");
 
   s->addTest(gr::filter::qa_firdes::suite());
+  s->addTest(gr::filter::fff::qa_fir_filter_with_buffer_fff::suite());
+  s->addTest(gr::filter::ccc::qa_fir_filter_with_buffer_ccc::suite());
+  s->addTest(gr::filter::ccf::qa_fir_filter_with_buffer_ccf::suite());
   s->addTest(gr::filter::qa_mmse_fir_interpolator_cc::suite());
   s->addTest(gr::filter::qa_mmse_fir_interpolator_ff::suite());
 
diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.cc b/gr-filter/lib/qa_fir_filter_with_buffer.cc
new file mode 100644
index 000000000..c97e2479c
--- /dev/null
+++ b/gr-filter/lib/qa_fir_filter_with_buffer.cc
@@ -0,0 +1,408 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <gr_types.h>
+#include <qa_fir_filter_with_buffer.h>
+#include <filter/fir_filter_with_buffer.h>
+#include <fft/fft.h>
+#include <cppunit/TestAssert.h>
+#include <cmath>
+#include <random.h>
+
+
+namespace gr {
+  namespace filter {
+
+#define MAX_DATA        (16383)
+#define	ERR_DELTA	(1e-5)
+
+#define	NELEM(x) (sizeof(x) / sizeof(x[0]))
+
+    static float
+    uniform() 
+    {
+      return 2.0 * ((float) random() / RANDOM_MAX - 0.5); // uniformly (-1, 1)
+    }
+
+    static void
+    random_floats(float *buf, unsigned n)
+    {
+      for(unsigned i = 0; i < n; i++)
+	buf[i] = (float)rint(uniform() * 32767);
+    }
+
+    static void
+    random_complex(gr_complex *buf, unsigned n)
+    {
+      for(unsigned i = 0; i < n; i++) {
+	float re = rint(uniform () * MAX_DATA);
+	float im = rint(uniform () * MAX_DATA);
+	buf[i] = gr_complex(re, im);
+      }
+    }
+
+    namespace fff {
+
+      typedef float i_type;
+      typedef float o_type;
+      typedef float tap_type;
+      typedef float acc_type;
+
+      using std::vector;
+
+      static o_type
+      ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+      {
+	acc_type sum = 0;
+	for (int i = 0; i < ntaps; i++) {
+	  sum += input[i] * taps[i];
+	}
+	return sum;
+      }
+
+      void
+      qa_fir_filter_with_buffer_fff::t1()
+      {
+	test_decimate(1);
+      }
+
+      void
+      qa_fir_filter_with_buffer_fff::t2()
+      {
+	test_decimate(2);
+      }
+
+      void
+      qa_fir_filter_with_buffer_fff::t3()
+      {
+	test_decimate(5);
+      }
+
+      //
+      // Test for ntaps in [0,9], and input lengths in [0,17].
+      // This ensures that we are building the shifted taps correctly,
+      // and exercises all corner cases on input alignment and length.
+      //
+      void
+      qa_fir_filter_with_buffer_fff::test_decimate(unsigned int decimate)
+      {
+	const int MAX_TAPS   = 29;
+	const int OUTPUT_LEN = 37;
+	const int INPUT_LEN  = MAX_TAPS + OUTPUT_LEN;
+
+	// Mem aligned buffer not really necessary, but why not?
+	i_type   *input = fft::malloc_float(INPUT_LEN);
+	i_type   *dline = fft::malloc_float(INPUT_LEN);
+	o_type   *expected_output = fft::malloc_float(OUTPUT_LEN);
+	o_type   *actual_output = fft::malloc_float(OUTPUT_LEN);
+	tap_type *taps = fft::malloc_float(MAX_TAPS);
+
+	srandom(0);	// we want reproducibility
+	memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+	for(int n = 0; n <= MAX_TAPS; n++) {
+	  for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+	    // build random test case
+	    random_floats(input, INPUT_LEN);
+	    random_floats(taps, MAX_TAPS);
+
+	    // compute expected output values
+	    memset(dline, 0, INPUT_LEN*sizeof(i_type));
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      // use an actual delay line for this test
+	      for(int dd = 0; dd < (int)decimate; dd++) {
+		for(int oo = INPUT_LEN-1; oo > 0; oo--)
+		  dline[oo] = dline[oo-1];
+		dline[0] = input[decimate*o+dd];
+	      }
+	      expected_output[o] = ref_dotprod(dline, taps, n);
+	    }
+
+	    // build filter
+	    vector<tap_type> f1_taps(&taps[0], &taps[n]);
+	    kernel::fir_filter_with_buffer_fff *f1 = \
+	      new kernel::fir_filter_with_buffer_fff(f1_taps);
+
+	    // zero the output, then do the filtering
+	    memset(actual_output, 0, sizeof(actual_output));
+	    f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+	    // check results
+	    //
+	    // we use a sloppy error margin because on the x86 architecture,
+	    // our reference implementation is using 80 bit floating point
+	    // arithmetic, while the SSE version is using 32 bit float point
+	    // arithmetic.
+
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      CPPUNIT_ASSERT_DOUBLES_EQUAL(expected_output[o], actual_output[o],
+			     sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+	    }
+	    delete f1;
+	  }
+	}
+	fft::free(input);
+	fft::free(dline);
+	fft::free(expected_output);
+	fft::free(actual_output);
+	fft::free(taps);
+      }
+
+    } /* namespace fff */
+
+
+    /**************************************************************/
+
+
+    namespace ccc {
+
+      typedef gr_complex i_type;
+      typedef gr_complex o_type;
+      typedef gr_complex tap_type;
+      typedef gr_complex acc_type;
+
+      using std::vector;
+
+      static o_type
+      ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+      {
+	acc_type sum = 0;
+	for(int i = 0; i < ntaps; i++) {
+	  sum += input[i] * taps[i];
+	}
+
+	return sum;
+      }
+
+      void
+      qa_fir_filter_with_buffer_ccc::t1()
+      {
+	test_decimate(1);
+      }
+
+      void
+      qa_fir_filter_with_buffer_ccc::t2()
+      {
+	test_decimate(2);
+      }
+
+      void
+      qa_fir_filter_with_buffer_ccc::t3()
+      {
+	test_decimate(5);
+      }
+
+      //
+      // Test for ntaps in [0,9], and input lengths in [0,17].
+      // This ensures that we are building the shifted taps correctly,
+      // and exercises all corner cases on input alignment and length.
+      //
+      void
+      qa_fir_filter_with_buffer_ccc::test_decimate(unsigned int decimate)
+      {
+	const int MAX_TAPS   = 29;
+	const int OUTPUT_LEN = 37;
+	const int INPUT_LEN  = MAX_TAPS + OUTPUT_LEN;
+
+	// Mem aligned buffer not really necessary, but why not?
+	i_type   *input = fft::malloc_complex(INPUT_LEN);
+	i_type   *dline = fft::malloc_complex(INPUT_LEN);
+	o_type   *expected_output = fft::malloc_complex(OUTPUT_LEN);
+	o_type   *actual_output = fft::malloc_complex(OUTPUT_LEN);
+	tap_type *taps = fft::malloc_complex(MAX_TAPS);
+
+	srandom(0);	// we want reproducibility
+	memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+	for(int n = 0; n <= MAX_TAPS; n++) {
+	  for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+	    // build random test case
+	    random_complex(input, INPUT_LEN);
+	    random_complex(taps, MAX_TAPS);
+
+	    // compute expected output values
+	    memset(dline, 0, INPUT_LEN*sizeof(i_type));
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      // use an actual delay line for this test
+	      for(int dd = 0; dd < (int)decimate; dd++) {
+		for(int oo = INPUT_LEN-1; oo > 0; oo--)
+		  dline[oo] = dline[oo-1];
+		dline[0] = input[decimate*o+dd];
+	      }
+	      expected_output[o] = ref_dotprod(dline, taps, n);
+	    }
+
+	    // build filter
+	    vector<tap_type> f1_taps(&taps[0], &taps[n]);
+	    kernel::fir_filter_with_buffer_ccc *f1 = \
+	      new kernel::fir_filter_with_buffer_ccc(f1_taps);
+
+	    // zero the output, then do the filtering
+	    memset(actual_output, 0, sizeof(actual_output));
+	    f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+	    // check results
+	    //
+	    // we use a sloppy error margin because on the x86 architecture,
+	    // our reference implementation is using 80 bit floating point
+	    // arithmetic, while the SSE version is using 32 bit float point
+	    // arithmetic.
+
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o],
+			      sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+	    }
+	    delete f1;
+	  }
+	}
+	fft::free(input);
+	fft::free(dline);
+	fft::free(expected_output);
+	fft::free(actual_output);
+	fft::free(taps);
+      }
+
+    } /* namespace ccc */
+
+
+    /**************************************************************/
+
+    namespace ccf {
+
+      typedef gr_complex i_type;
+      typedef gr_complex o_type;
+      typedef float      tap_type;
+      typedef gr_complex acc_type;
+
+      using std::vector;
+
+      static o_type
+      ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+      {
+	acc_type sum = 0;
+	for(int i = 0; i < ntaps; i++) {
+	  sum += input[i] * taps[i];
+	}
+
+	//return gr_complex(121,9)*sum; 
+	return sum;
+     }
+
+      void
+      qa_fir_filter_with_buffer_ccf::t1()
+      {
+	test_decimate(1);
+      }
+
+      void
+      qa_fir_filter_with_buffer_ccf::t2()
+      {
+	test_decimate(2);
+      }
+
+      void
+      qa_fir_filter_with_buffer_ccf::t3()
+      {
+	test_decimate(5);
+      }
+
+      //
+      // Test for ntaps in [0,9], and input lengths in [0,17].
+      // This ensures that we are building the shifted taps correctly,
+      // and exercises all corner cases on input alignment and length.
+      //
+      void
+      qa_fir_filter_with_buffer_ccf::test_decimate(unsigned int decimate)
+      {
+	const int MAX_TAPS   = 29;
+	const int OUTPUT_LEN = 37;
+	const int INPUT_LEN  = MAX_TAPS + OUTPUT_LEN;
+
+	// Mem aligned buffer not really necessary, but why not?
+	i_type   *input = fft::malloc_complex(INPUT_LEN);
+	i_type   *dline = fft::malloc_complex(INPUT_LEN);
+	o_type   *expected_output = fft::malloc_complex(OUTPUT_LEN);
+	o_type   *actual_output = fft::malloc_complex(OUTPUT_LEN);
+	tap_type *taps = fft::malloc_float(MAX_TAPS);
+
+	srandom(0);	// we want reproducibility
+	memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+	for(int n = 0; n <= MAX_TAPS; n++) {
+	  for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+	    // build random test case
+	    random_complex(input, INPUT_LEN);
+	    random_floats(taps, MAX_TAPS);
+
+	    // compute expected output values
+	    memset(dline, 0, INPUT_LEN*sizeof(i_type));
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      // use an actual delay line for this test
+	      for(int dd = 0; dd < (int)decimate; dd++) {
+		for(int oo = INPUT_LEN-1; oo > 0; oo--)
+		  dline[oo] = dline[oo-1];
+		dline[0] = input[decimate*o+dd];
+	      }
+	      expected_output[o] = ref_dotprod(dline, taps, n);
+	    }
+
+	    // build filter
+	    vector<tap_type> f1_taps(&taps[0], &taps[n]);
+	    kernel::fir_filter_with_buffer_ccf *f1 = \
+	      new kernel::fir_filter_with_buffer_ccf(f1_taps);
+
+	    // zero the output, then do the filtering
+	    memset(actual_output, 0, sizeof(actual_output));
+	    f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+	    // check results
+	    //
+	    // we use a sloppy error margin because on the x86 architecture,
+	    // our reference implementation is using 80 bit floating point
+	    // arithmetic, while the SSE version is using 32 bit float point
+	    // arithmetic.
+
+	    for(int o = 0; o < (int)(ol/decimate); o++) {
+	      CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o],
+			      sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+	    }
+	    delete f1;
+	  }
+	}
+	fft::free(input);
+	fft::free(dline);
+	fft::free(expected_output);
+	fft::free(actual_output);
+	fft::free(taps);
+      }
+
+    } /* namespace ccf */
+
+  } /* namespace filter */
+} /* namespace gr */
diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.h b/gr-filter/lib/qa_fir_filter_with_buffer.h
new file mode 100644
index 000000000..8850ada20
--- /dev/null
+++ b/gr-filter/lib/qa_fir_filter_with_buffer.h
@@ -0,0 +1,94 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _QA_FIR_FILTER_WITH_BUFFER_H_
+#define _QA_FIR_FILTER_WITH_BUFFER_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+namespace gr {
+  namespace filter {
+
+    namespace fff {
+
+      class qa_fir_filter_with_buffer_fff : public CppUnit::TestCase 
+      {
+	CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_fff);
+	CPPUNIT_TEST(t1);
+	CPPUNIT_TEST(t2);
+	CPPUNIT_TEST(t3);
+	CPPUNIT_TEST_SUITE_END();
+
+      private:
+	void test_decimate(unsigned int decimate);
+
+	void t1();
+	void t2();
+	void t3();
+      };
+
+    } /* namespace fff */
+
+    namespace ccc {
+
+      class qa_fir_filter_with_buffer_ccc : public CppUnit::TestCase 
+      {
+	CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccc);
+	CPPUNIT_TEST(t1);
+	CPPUNIT_TEST(t2);
+	CPPUNIT_TEST(t3);
+	CPPUNIT_TEST_SUITE_END();
+
+      private:
+	void test_decimate(unsigned int decimate);
+
+	void t1();
+	void t2();
+	void t3();
+      };
+
+    } /* namespace ccc */
+
+    namespace ccf {
+
+      class qa_fir_filter_with_buffer_ccf : public CppUnit::TestCase 
+      {
+	CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccf);
+	CPPUNIT_TEST(t1);
+	CPPUNIT_TEST(t2);
+	CPPUNIT_TEST(t3);
+	CPPUNIT_TEST_SUITE_END();
+
+      private:
+	void test_decimate(unsigned int decimate);
+
+	void t1();
+	void t2();
+	void t3();
+      };
+
+    } /* namespace ccf */
+
+  } /* namespace filter */
+} /* namespace gr */
+
+#endif /* _QA_FIR_FILTER_WITH_BUFFER_H_ */
author	Tom Rondeau	2012-06-17 20:35:53 -0400
committer	Tom Rondeau	2012-06-17 20:35:53 -0400
commit	6b2dbab570adceb3a7fa29f298da24a5e53cbf64 (patch)
tree	b42adb8bd5ba973bada96d12077790b19d6c61e7
parent	765d82d839e1773c77556e5d73ab229cbe6b96dc (diff)
download	gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.gz gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.bz2 gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.zip