summaryrefslogtreecommitdiff
path: root/gr-filter
diff options
context:
space:
mode:
authorTom Rondeau2012-06-17 20:35:53 -0400
committerTom Rondeau2012-06-17 20:35:53 -0400
commit6b2dbab570adceb3a7fa29f298da24a5e53cbf64 (patch)
treeb42adb8bd5ba973bada96d12077790b19d6c61e7 /gr-filter
parent765d82d839e1773c77556e5d73ab229cbe6b96dc (diff)
downloadgnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.gz
gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.bz2
gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.zip
filter: fixed fir_filter_with_buffer and added QA code to check.
Diffstat (limited to 'gr-filter')
-rw-r--r--gr-filter/include/filter/fir_filter_with_buffer.h6
-rw-r--r--gr-filter/lib/CMakeLists.txt1
-rw-r--r--gr-filter/lib/fir_filter.cc1
-rw-r--r--gr-filter/lib/fir_filter_with_buffer.cc133
-rw-r--r--gr-filter/lib/qa_filter.cc4
-rw-r--r--gr-filter/lib/qa_fir_filter_with_buffer.cc408
-rw-r--r--gr-filter/lib/qa_fir_filter_with_buffer.h94
7 files changed, 604 insertions, 43 deletions
diff --git a/gr-filter/include/filter/fir_filter_with_buffer.h b/gr-filter/include/filter/fir_filter_with_buffer.h
index 2ccb74906..8b5d9e064 100644
--- a/gr-filter/include/filter/fir_filter_with_buffer.h
+++ b/gr-filter/include/filter/fir_filter_with_buffer.h
@@ -42,7 +42,7 @@ namespace gr {
unsigned int d_ntaps;
float *d_buffer;
unsigned int d_idx;
- float *d_aligned_taps;
+ float **d_aligned_taps;
float *d_output;
int d_align;
int d_naligned;
@@ -138,7 +138,7 @@ namespace gr {
unsigned int d_ntaps;
gr_complex *d_buffer;
unsigned int d_idx;
- gr_complex *d_aligned_taps;
+ gr_complex **d_aligned_taps;
gr_complex *d_output;
int d_align;
int d_naligned;
@@ -234,7 +234,7 @@ namespace gr {
unsigned int d_ntaps;
gr_complex *d_buffer;
unsigned int d_idx;
- float *d_aligned_taps;
+ float **d_aligned_taps;
gr_complex *d_output;
int d_align;
int d_naligned;
diff --git a/gr-filter/lib/CMakeLists.txt b/gr-filter/lib/CMakeLists.txt
index 4d950230a..97afe7139 100644
--- a/gr-filter/lib/CMakeLists.txt
+++ b/gr-filter/lib/CMakeLists.txt
@@ -150,6 +150,7 @@ list(APPEND test_gr_filter_sources
${CMAKE_CURRENT_SOURCE_DIR}/test_gr_filter.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_filter.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_firdes.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/qa_fir_filter_with_buffer.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_cc.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_ff.cc
)
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 7ca1fae19..8e585f245 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -24,7 +24,6 @@
#include <fft/fft.h>
#include <volk/volk.h>
#include <cstdio>
-#include <float_dotprod_x86.h>
namespace gr {
namespace filter {
diff --git a/gr-filter/lib/fir_filter_with_buffer.cc b/gr-filter/lib/fir_filter_with_buffer.cc
index b1cc589a5..58ef67fb7 100644
--- a/gr-filter/lib/fir_filter_with_buffer.cc
+++ b/gr-filter/lib/fir_filter_with_buffer.cc
@@ -28,6 +28,7 @@
#include <fft/fft.h>
#include <volk/volk.h>
#include <algorithm>
+#include <cstdio>
namespace gr {
namespace filter {
@@ -54,8 +55,13 @@ namespace gr {
}
// Free aligned taps
- fft::free(d_aligned_taps);
- d_aligned_taps = NULL;
+ if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
+ d_aligned_taps = NULL;
+ }
// Free output sample
fft::free(d_output);
@@ -71,20 +77,27 @@ namespace gr {
// Free the taps if already allocated
if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
fft::free(d_aligned_taps);
d_aligned_taps = NULL;
}
- d_buffer = fft::malloc_float(d_ntaps);
-
d_ntaps = (int)taps.size();
d_taps = taps;
std::reverse(d_taps.begin(), d_taps.end());
+ d_buffer = fft::malloc_float(2*d_ntaps);
+ memset(d_buffer, 0, 2*d_ntaps*sizeof(float));
+
// Allocate aligned taps
- d_aligned_taps = fft::malloc_float(d_ntaps);
- for(unsigned int i = 0; i < d_ntaps; i++) {
- d_aligned_taps[i] = d_taps[i];
+ d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+ for(int i = 0; i < d_naligned; i++) {
+ d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+ memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
+ for(unsigned int j = 0; j < d_ntaps; j++)
+ d_aligned_taps[i][i+j] = d_taps[j];
}
d_idx = 0;
@@ -108,9 +121,12 @@ namespace gr {
if(d_idx >= ntaps())
d_idx = 0;
- volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32f_x2_dot_prod_32f_a(d_output, ar,
+ d_aligned_taps[al],
+ ntaps()+al);
return *d_output;
}
@@ -128,9 +144,12 @@ namespace gr {
d_idx = 0;
}
- volk_32f_x2_dot_prod_32f_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32f_x2_dot_prod_32f_a(d_output, ar,
+ d_aligned_taps[al],
+ ntaps()+al);
return *d_output;
}
@@ -182,8 +201,13 @@ namespace gr {
}
// Free aligned taps
- fft::free(d_aligned_taps);
- d_aligned_taps = NULL;
+ if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
+ d_aligned_taps = NULL;
+ }
// Free output sample
fft::free(d_output);
@@ -199,20 +223,27 @@ namespace gr {
// Free the taps if already allocated
if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
fft::free(d_aligned_taps);
d_aligned_taps = NULL;
}
- d_buffer = fft::malloc_complex(d_ntaps);
-
d_ntaps = (int)taps.size();
d_taps = taps;
std::reverse(d_taps.begin(), d_taps.end());
+ d_buffer = fft::malloc_complex(2*d_ntaps);
+ memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex));
+
// Allocate aligned taps
- d_aligned_taps = fft::malloc_complex(d_ntaps);
- for(unsigned int i = 0; i < d_ntaps; i++) {
- d_aligned_taps[i] = d_taps[i];
+ d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**));
+ for(int i = 0; i < d_naligned; i++) {
+ d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1);
+ memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1));
+ for(unsigned int j = 0; j < d_ntaps; j++)
+ d_aligned_taps[i][i+j] = d_taps[j];
}
d_idx = 0;
@@ -236,9 +267,12 @@ namespace gr {
if(d_idx >= ntaps())
d_idx = 0;
- volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32fc_x2_dot_prod_32fc_a(d_output, ar,
+ d_aligned_taps[al],
+ (ntaps()+al)*sizeof(gr_complex));
return *d_output;
}
@@ -256,9 +290,12 @@ namespace gr {
d_idx = 0;
}
- volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32fc_x2_dot_prod_32fc_a(d_output, ar,
+ d_aligned_taps[al],
+ (ntaps()+al)*sizeof(gr_complex));
return *d_output;
}
@@ -310,8 +347,13 @@ namespace gr {
}
// Free aligned taps
- fft::free(d_aligned_taps);
- d_aligned_taps = NULL;
+ if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
+ fft::free(d_aligned_taps);
+ d_aligned_taps = NULL;
+ }
// Free output sample
fft::free(d_output);
@@ -327,20 +369,27 @@ namespace gr {
// Free the taps if already allocated
if(d_aligned_taps != NULL) {
+ for(int i = 0; i < d_naligned; i++) {
+ fft::free(d_aligned_taps[i]);
+ }
fft::free(d_aligned_taps);
d_aligned_taps = NULL;
}
- d_buffer = fft::malloc_complex(d_ntaps);
-
d_ntaps = (int)taps.size();
d_taps = taps;
std::reverse(d_taps.begin(), d_taps.end());
+ d_buffer = fft::malloc_complex(2*d_ntaps);
+ memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex));
+
// Allocate aligned taps
- d_aligned_taps = fft::malloc_float(d_ntaps);
- for(unsigned int i = 0; i < d_ntaps; i++) {
- d_aligned_taps[i] = d_taps[i];
+ d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+ for(int i = 0; i < d_naligned; i++) {
+ d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+ memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
+ for(unsigned int j = 0; j < d_ntaps; j++)
+ d_aligned_taps[i][i+j] = d_taps[j];
}
d_idx = 0;
@@ -364,9 +413,12 @@ namespace gr {
if(d_idx >= ntaps())
d_idx = 0;
- volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32fc_32f_dot_prod_32fc_a(d_output, ar,
+ d_aligned_taps[al],
+ ntaps()+al);
return *d_output;
}
@@ -384,9 +436,12 @@ namespace gr {
d_idx = 0;
}
- volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer,
- d_aligned_taps,
- ntaps());
+ const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1));
+ unsigned al = (&d_buffer[d_idx]) - ar;
+
+ volk_32fc_32f_dot_prod_32fc_a(d_output, ar,
+ d_aligned_taps[al],
+ ntaps()+al);
return *d_output;
}
diff --git a/gr-filter/lib/qa_filter.cc b/gr-filter/lib/qa_filter.cc
index 8aa24651c..b7760b19e 100644
--- a/gr-filter/lib/qa_filter.cc
+++ b/gr-filter/lib/qa_filter.cc
@@ -27,6 +27,7 @@
#include <qa_filter.h>
#include <qa_firdes.h>
+#include <qa_fir_filter_with_buffer.h>
#include <qa_mmse_fir_interpolator_cc.h>
#include <qa_mmse_fir_interpolator_ff.h>
@@ -36,6 +37,9 @@ qa_gr_filter::suite ()
CppUnit::TestSuite *s = new CppUnit::TestSuite ("gr-filter");
s->addTest(gr::filter::qa_firdes::suite());
+ s->addTest(gr::filter::fff::qa_fir_filter_with_buffer_fff::suite());
+ s->addTest(gr::filter::ccc::qa_fir_filter_with_buffer_ccc::suite());
+ s->addTest(gr::filter::ccf::qa_fir_filter_with_buffer_ccf::suite());
s->addTest(gr::filter::qa_mmse_fir_interpolator_cc::suite());
s->addTest(gr::filter::qa_mmse_fir_interpolator_ff::suite());
diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.cc b/gr-filter/lib/qa_fir_filter_with_buffer.cc
new file mode 100644
index 000000000..c97e2479c
--- /dev/null
+++ b/gr-filter/lib/qa_fir_filter_with_buffer.cc
@@ -0,0 +1,408 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <gr_types.h>
+#include <qa_fir_filter_with_buffer.h>
+#include <filter/fir_filter_with_buffer.h>
+#include <fft/fft.h>
+#include <cppunit/TestAssert.h>
+#include <cmath>
+#include <random.h>
+
+
+namespace gr {
+ namespace filter {
+
+#define MAX_DATA (16383)
+#define ERR_DELTA (1e-5)
+
+#define NELEM(x) (sizeof(x) / sizeof(x[0]))
+
+ static float
+ uniform()
+ {
+ return 2.0 * ((float) random() / RANDOM_MAX - 0.5); // uniformly (-1, 1)
+ }
+
+ static void
+ random_floats(float *buf, unsigned n)
+ {
+ for(unsigned i = 0; i < n; i++)
+ buf[i] = (float)rint(uniform() * 32767);
+ }
+
+ static void
+ random_complex(gr_complex *buf, unsigned n)
+ {
+ for(unsigned i = 0; i < n; i++) {
+ float re = rint(uniform () * MAX_DATA);
+ float im = rint(uniform () * MAX_DATA);
+ buf[i] = gr_complex(re, im);
+ }
+ }
+
+ namespace fff {
+
+ typedef float i_type;
+ typedef float o_type;
+ typedef float tap_type;
+ typedef float acc_type;
+
+ using std::vector;
+
+ static o_type
+ ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+ {
+ acc_type sum = 0;
+ for (int i = 0; i < ntaps; i++) {
+ sum += input[i] * taps[i];
+ }
+ return sum;
+ }
+
+ void
+ qa_fir_filter_with_buffer_fff::t1()
+ {
+ test_decimate(1);
+ }
+
+ void
+ qa_fir_filter_with_buffer_fff::t2()
+ {
+ test_decimate(2);
+ }
+
+ void
+ qa_fir_filter_with_buffer_fff::t3()
+ {
+ test_decimate(5);
+ }
+
+ //
+ // Test for ntaps in [0,9], and input lengths in [0,17].
+ // This ensures that we are building the shifted taps correctly,
+ // and exercises all corner cases on input alignment and length.
+ //
+ void
+ qa_fir_filter_with_buffer_fff::test_decimate(unsigned int decimate)
+ {
+ const int MAX_TAPS = 29;
+ const int OUTPUT_LEN = 37;
+ const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN;
+
+ // Mem aligned buffer not really necessary, but why not?
+ i_type *input = fft::malloc_float(INPUT_LEN);
+ i_type *dline = fft::malloc_float(INPUT_LEN);
+ o_type *expected_output = fft::malloc_float(OUTPUT_LEN);
+ o_type *actual_output = fft::malloc_float(OUTPUT_LEN);
+ tap_type *taps = fft::malloc_float(MAX_TAPS);
+
+ srandom(0); // we want reproducibility
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+ for(int n = 0; n <= MAX_TAPS; n++) {
+ for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+ // build random test case
+ random_floats(input, INPUT_LEN);
+ random_floats(taps, MAX_TAPS);
+
+ // compute expected output values
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ // use an actual delay line for this test
+ for(int dd = 0; dd < (int)decimate; dd++) {
+ for(int oo = INPUT_LEN-1; oo > 0; oo--)
+ dline[oo] = dline[oo-1];
+ dline[0] = input[decimate*o+dd];
+ }
+ expected_output[o] = ref_dotprod(dline, taps, n);
+ }
+
+ // build filter
+ vector<tap_type> f1_taps(&taps[0], &taps[n]);
+ kernel::fir_filter_with_buffer_fff *f1 = \
+ new kernel::fir_filter_with_buffer_fff(f1_taps);
+
+ // zero the output, then do the filtering
+ memset(actual_output, 0, sizeof(actual_output));
+ f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+ // check results
+ //
+ // we use a sloppy error margin because on the x86 architecture,
+ // our reference implementation is using 80 bit floating point
+ // arithmetic, while the SSE version is using 32 bit float point
+ // arithmetic.
+
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(expected_output[o], actual_output[o],
+ sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+ }
+ delete f1;
+ }
+ }
+ fft::free(input);
+ fft::free(dline);
+ fft::free(expected_output);
+ fft::free(actual_output);
+ fft::free(taps);
+ }
+
+ } /* namespace fff */
+
+
+ /**************************************************************/
+
+
+ namespace ccc {
+
+ typedef gr_complex i_type;
+ typedef gr_complex o_type;
+ typedef gr_complex tap_type;
+ typedef gr_complex acc_type;
+
+ using std::vector;
+
+ static o_type
+ ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+ {
+ acc_type sum = 0;
+ for(int i = 0; i < ntaps; i++) {
+ sum += input[i] * taps[i];
+ }
+
+ return sum;
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccc::t1()
+ {
+ test_decimate(1);
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccc::t2()
+ {
+ test_decimate(2);
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccc::t3()
+ {
+ test_decimate(5);
+ }
+
+ //
+ // Test for ntaps in [0,9], and input lengths in [0,17].
+ // This ensures that we are building the shifted taps correctly,
+ // and exercises all corner cases on input alignment and length.
+ //
+ void
+ qa_fir_filter_with_buffer_ccc::test_decimate(unsigned int decimate)
+ {
+ const int MAX_TAPS = 29;
+ const int OUTPUT_LEN = 37;
+ const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN;
+
+ // Mem aligned buffer not really necessary, but why not?
+ i_type *input = fft::malloc_complex(INPUT_LEN);
+ i_type *dline = fft::malloc_complex(INPUT_LEN);
+ o_type *expected_output = fft::malloc_complex(OUTPUT_LEN);
+ o_type *actual_output = fft::malloc_complex(OUTPUT_LEN);
+ tap_type *taps = fft::malloc_complex(MAX_TAPS);
+
+ srandom(0); // we want reproducibility
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+ for(int n = 0; n <= MAX_TAPS; n++) {
+ for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+ // build random test case
+ random_complex(input, INPUT_LEN);
+ random_complex(taps, MAX_TAPS);
+
+ // compute expected output values
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ // use an actual delay line for this test
+ for(int dd = 0; dd < (int)decimate; dd++) {
+ for(int oo = INPUT_LEN-1; oo > 0; oo--)
+ dline[oo] = dline[oo-1];
+ dline[0] = input[decimate*o+dd];
+ }
+ expected_output[o] = ref_dotprod(dline, taps, n);
+ }
+
+ // build filter
+ vector<tap_type> f1_taps(&taps[0], &taps[n]);
+ kernel::fir_filter_with_buffer_ccc *f1 = \
+ new kernel::fir_filter_with_buffer_ccc(f1_taps);
+
+ // zero the output, then do the filtering
+ memset(actual_output, 0, sizeof(actual_output));
+ f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+ // check results
+ //
+ // we use a sloppy error margin because on the x86 architecture,
+ // our reference implementation is using 80 bit floating point
+ // arithmetic, while the SSE version is using 32 bit float point
+ // arithmetic.
+
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o],
+ sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+ }
+ delete f1;
+ }
+ }
+ fft::free(input);
+ fft::free(dline);
+ fft::free(expected_output);
+ fft::free(actual_output);
+ fft::free(taps);
+ }
+
+ } /* namespace ccc */
+
+
+ /**************************************************************/
+
+ namespace ccf {
+
+ typedef gr_complex i_type;
+ typedef gr_complex o_type;
+ typedef float tap_type;
+ typedef gr_complex acc_type;
+
+ using std::vector;
+
+ static o_type
+ ref_dotprod(const i_type input[], const tap_type taps[], int ntaps)
+ {
+ acc_type sum = 0;
+ for(int i = 0; i < ntaps; i++) {
+ sum += input[i] * taps[i];
+ }
+
+ //return gr_complex(121,9)*sum;
+ return sum;
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccf::t1()
+ {
+ test_decimate(1);
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccf::t2()
+ {
+ test_decimate(2);
+ }
+
+ void
+ qa_fir_filter_with_buffer_ccf::t3()
+ {
+ test_decimate(5);
+ }
+
+ //
+ // Test for ntaps in [0,9], and input lengths in [0,17].
+ // This ensures that we are building the shifted taps correctly,
+ // and exercises all corner cases on input alignment and length.
+ //
+ void
+ qa_fir_filter_with_buffer_ccf::test_decimate(unsigned int decimate)
+ {
+ const int MAX_TAPS = 29;
+ const int OUTPUT_LEN = 37;
+ const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN;
+
+ // Mem aligned buffer not really necessary, but why not?
+ i_type *input = fft::malloc_complex(INPUT_LEN);
+ i_type *dline = fft::malloc_complex(INPUT_LEN);
+ o_type *expected_output = fft::malloc_complex(OUTPUT_LEN);
+ o_type *actual_output = fft::malloc_complex(OUTPUT_LEN);
+ tap_type *taps = fft::malloc_float(MAX_TAPS);
+
+ srandom(0); // we want reproducibility
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+
+ for(int n = 0; n <= MAX_TAPS; n++) {
+ for(int ol = 0; ol <= OUTPUT_LEN; ol++) {
+
+ // build random test case
+ random_complex(input, INPUT_LEN);
+ random_floats(taps, MAX_TAPS);
+
+ // compute expected output values
+ memset(dline, 0, INPUT_LEN*sizeof(i_type));
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ // use an actual delay line for this test
+ for(int dd = 0; dd < (int)decimate; dd++) {
+ for(int oo = INPUT_LEN-1; oo > 0; oo--)
+ dline[oo] = dline[oo-1];
+ dline[0] = input[decimate*o+dd];
+ }
+ expected_output[o] = ref_dotprod(dline, taps, n);
+ }
+
+ // build filter
+ vector<tap_type> f1_taps(&taps[0], &taps[n]);
+ kernel::fir_filter_with_buffer_ccf *f1 = \
+ new kernel::fir_filter_with_buffer_ccf(f1_taps);
+
+ // zero the output, then do the filtering
+ memset(actual_output, 0, sizeof(actual_output));
+ f1->filterNdec(actual_output, input, ol/decimate, decimate);
+
+ // check results
+ //
+ // we use a sloppy error margin because on the x86 architecture,
+ // our reference implementation is using 80 bit floating point
+ // arithmetic, while the SSE version is using 32 bit float point
+ // arithmetic.
+
+ for(int o = 0; o < (int)(ol/decimate); o++) {
+ CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o],
+ sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA);
+ }
+ delete f1;
+ }
+ }
+ fft::free(input);
+ fft::free(dline);
+ fft::free(expected_output);
+ fft::free(actual_output);
+ fft::free(taps);
+ }
+
+ } /* namespace ccf */
+
+ } /* namespace filter */
+} /* namespace gr */
diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.h b/gr-filter/lib/qa_fir_filter_with_buffer.h
new file mode 100644
index 000000000..8850ada20
--- /dev/null
+++ b/gr-filter/lib/qa_fir_filter_with_buffer.h
@@ -0,0 +1,94 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _QA_FIR_FILTER_WITH_BUFFER_H_
+#define _QA_FIR_FILTER_WITH_BUFFER_H_
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+namespace gr {
+ namespace filter {
+
+ namespace fff {
+
+ class qa_fir_filter_with_buffer_fff : public CppUnit::TestCase
+ {
+ CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_fff);
+ CPPUNIT_TEST(t1);
+ CPPUNIT_TEST(t2);
+ CPPUNIT_TEST(t3);
+ CPPUNIT_TEST_SUITE_END();
+
+ private:
+ void test_decimate(unsigned int decimate);
+
+ void t1();
+ void t2();
+ void t3();
+ };
+
+ } /* namespace fff */
+
+ namespace ccc {
+
+ class qa_fir_filter_with_buffer_ccc : public CppUnit::TestCase
+ {
+ CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccc);
+ CPPUNIT_TEST(t1);
+ CPPUNIT_TEST(t2);
+ CPPUNIT_TEST(t3);
+ CPPUNIT_TEST_SUITE_END();
+
+ private:
+ void test_decimate(unsigned int decimate);
+
+ void t1();
+ void t2();
+ void t3();
+ };
+
+ } /* namespace ccc */
+
+ namespace ccf {
+
+ class qa_fir_filter_with_buffer_ccf : public CppUnit::TestCase
+ {
+ CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccf);
+ CPPUNIT_TEST(t1);
+ CPPUNIT_TEST(t2);
+ CPPUNIT_TEST(t3);
+ CPPUNIT_TEST_SUITE_END();
+
+ private:
+ void test_decimate(unsigned int decimate);
+
+ void t1();
+ void t2();
+ void t3();
+ };
+
+ } /* namespace ccf */
+
+ } /* namespace filter */
+} /* namespace gr */
+
+#endif /* _QA_FIR_FILTER_WITH_BUFFER_H_ */