diff options
author | Tom Rondeau | 2012-06-17 20:35:53 -0400 |
---|---|---|
committer | Tom Rondeau | 2012-06-17 20:35:53 -0400 |
commit | 6b2dbab570adceb3a7fa29f298da24a5e53cbf64 (patch) | |
tree | b42adb8bd5ba973bada96d12077790b19d6c61e7 /gr-filter | |
parent | 765d82d839e1773c77556e5d73ab229cbe6b96dc (diff) | |
download | gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.gz gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.tar.bz2 gnuradio-6b2dbab570adceb3a7fa29f298da24a5e53cbf64.zip |
filter: fixed fir_filter_with_buffer and added QA code to check.
Diffstat (limited to 'gr-filter')
-rw-r--r-- | gr-filter/include/filter/fir_filter_with_buffer.h | 6 | ||||
-rw-r--r-- | gr-filter/lib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter.cc | 1 | ||||
-rw-r--r-- | gr-filter/lib/fir_filter_with_buffer.cc | 133 | ||||
-rw-r--r-- | gr-filter/lib/qa_filter.cc | 4 | ||||
-rw-r--r-- | gr-filter/lib/qa_fir_filter_with_buffer.cc | 408 | ||||
-rw-r--r-- | gr-filter/lib/qa_fir_filter_with_buffer.h | 94 |
7 files changed, 604 insertions, 43 deletions
diff --git a/gr-filter/include/filter/fir_filter_with_buffer.h b/gr-filter/include/filter/fir_filter_with_buffer.h index 2ccb74906..8b5d9e064 100644 --- a/gr-filter/include/filter/fir_filter_with_buffer.h +++ b/gr-filter/include/filter/fir_filter_with_buffer.h @@ -42,7 +42,7 @@ namespace gr { unsigned int d_ntaps; float *d_buffer; unsigned int d_idx; - float *d_aligned_taps; + float **d_aligned_taps; float *d_output; int d_align; int d_naligned; @@ -138,7 +138,7 @@ namespace gr { unsigned int d_ntaps; gr_complex *d_buffer; unsigned int d_idx; - gr_complex *d_aligned_taps; + gr_complex **d_aligned_taps; gr_complex *d_output; int d_align; int d_naligned; @@ -234,7 +234,7 @@ namespace gr { unsigned int d_ntaps; gr_complex *d_buffer; unsigned int d_idx; - float *d_aligned_taps; + float **d_aligned_taps; gr_complex *d_output; int d_align; int d_naligned; diff --git a/gr-filter/lib/CMakeLists.txt b/gr-filter/lib/CMakeLists.txt index 4d950230a..97afe7139 100644 --- a/gr-filter/lib/CMakeLists.txt +++ b/gr-filter/lib/CMakeLists.txt @@ -150,6 +150,7 @@ list(APPEND test_gr_filter_sources ${CMAKE_CURRENT_SOURCE_DIR}/test_gr_filter.cc ${CMAKE_CURRENT_SOURCE_DIR}/qa_filter.cc ${CMAKE_CURRENT_SOURCE_DIR}/qa_firdes.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_fir_filter_with_buffer.cc ${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_cc.cc ${CMAKE_CURRENT_SOURCE_DIR}/qa_mmse_fir_interpolator_ff.cc ) diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc index 7ca1fae19..8e585f245 100644 --- a/gr-filter/lib/fir_filter.cc +++ b/gr-filter/lib/fir_filter.cc @@ -24,7 +24,6 @@ #include <fft/fft.h> #include <volk/volk.h> #include <cstdio> -#include <float_dotprod_x86.h> namespace gr { namespace filter { diff --git a/gr-filter/lib/fir_filter_with_buffer.cc b/gr-filter/lib/fir_filter_with_buffer.cc index b1cc589a5..58ef67fb7 100644 --- a/gr-filter/lib/fir_filter_with_buffer.cc +++ b/gr-filter/lib/fir_filter_with_buffer.cc @@ -28,6 +28,7 @@ #include <fft/fft.h> #include <volk/volk.h> #include <algorithm> +#include <cstdio> namespace gr { namespace filter { @@ -54,8 +55,13 @@ namespace gr { } // Free aligned taps - fft::free(d_aligned_taps); - d_aligned_taps = NULL; + if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + d_aligned_taps = NULL; + } // Free output sample fft::free(d_output); @@ -71,20 +77,27 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } fft::free(d_aligned_taps); d_aligned_taps = NULL; } - d_buffer = fft::malloc_float(d_ntaps); - d_ntaps = (int)taps.size(); d_taps = taps; std::reverse(d_taps.begin(), d_taps.end()); + d_buffer = fft::malloc_float(2*d_ntaps); + memset(d_buffer, 0, 2*d_ntaps*sizeof(float)); + // Allocate aligned taps - d_aligned_taps = fft::malloc_float(d_ntaps); - for(unsigned int i = 0; i < d_ntaps; i++) { - d_aligned_taps[i] = d_taps[i]; + d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); + for(unsigned int j = 0; j < d_ntaps; j++) + d_aligned_taps[i][i+j] = d_taps[j]; } d_idx = 0; @@ -108,9 +121,12 @@ namespace gr { if(d_idx >= ntaps()) d_idx = 0; - volk_32f_x2_dot_prod_32f_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32f_x2_dot_prod_32f_a(d_output, ar, + d_aligned_taps[al], + ntaps()+al); return *d_output; } @@ -128,9 +144,12 @@ namespace gr { d_idx = 0; } - volk_32f_x2_dot_prod_32f_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const float *ar = (float*)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32f_x2_dot_prod_32f_a(d_output, ar, + d_aligned_taps[al], + ntaps()+al); return *d_output; } @@ -182,8 +201,13 @@ namespace gr { } // Free aligned taps - fft::free(d_aligned_taps); - d_aligned_taps = NULL; + if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + d_aligned_taps = NULL; + } // Free output sample fft::free(d_output); @@ -199,20 +223,27 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } fft::free(d_aligned_taps); d_aligned_taps = NULL; } - d_buffer = fft::malloc_complex(d_ntaps); - d_ntaps = (int)taps.size(); d_taps = taps; std::reverse(d_taps.begin(), d_taps.end()); + d_buffer = fft::malloc_complex(2*d_ntaps); + memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex)); + // Allocate aligned taps - d_aligned_taps = fft::malloc_complex(d_ntaps); - for(unsigned int i = 0; i < d_ntaps; i++) { - d_aligned_taps[i] = d_taps[i]; + d_aligned_taps = (gr_complex**)malloc(d_naligned*sizeof(gr_complex**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_complex(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(gr_complex)*(d_ntaps+d_naligned-1)); + for(unsigned int j = 0; j < d_ntaps; j++) + d_aligned_taps[i][i+j] = d_taps[j]; } d_idx = 0; @@ -236,9 +267,12 @@ namespace gr { if(d_idx >= ntaps()) d_idx = 0; - volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32fc_x2_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + (ntaps()+al)*sizeof(gr_complex)); return *d_output; } @@ -256,9 +290,12 @@ namespace gr { d_idx = 0; } - volk_32fc_x2_dot_prod_32fc_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32fc_x2_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + (ntaps()+al)*sizeof(gr_complex)); return *d_output; } @@ -310,8 +347,13 @@ namespace gr { } // Free aligned taps - fft::free(d_aligned_taps); - d_aligned_taps = NULL; + if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } + fft::free(d_aligned_taps); + d_aligned_taps = NULL; + } // Free output sample fft::free(d_output); @@ -327,20 +369,27 @@ namespace gr { // Free the taps if already allocated if(d_aligned_taps != NULL) { + for(int i = 0; i < d_naligned; i++) { + fft::free(d_aligned_taps[i]); + } fft::free(d_aligned_taps); d_aligned_taps = NULL; } - d_buffer = fft::malloc_complex(d_ntaps); - d_ntaps = (int)taps.size(); d_taps = taps; std::reverse(d_taps.begin(), d_taps.end()); + d_buffer = fft::malloc_complex(2*d_ntaps); + memset(d_buffer, 0, 2*d_ntaps*sizeof(gr_complex)); + // Allocate aligned taps - d_aligned_taps = fft::malloc_float(d_ntaps); - for(unsigned int i = 0; i < d_ntaps; i++) { - d_aligned_taps[i] = d_taps[i]; + d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**)); + for(int i = 0; i < d_naligned; i++) { + d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1); + memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1)); + for(unsigned int j = 0; j < d_ntaps; j++) + d_aligned_taps[i][i+j] = d_taps[j]; } d_idx = 0; @@ -364,9 +413,12 @@ namespace gr { if(d_idx >= ntaps()) d_idx = 0; - volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32fc_32f_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + ntaps()+al); return *d_output; } @@ -384,9 +436,12 @@ namespace gr { d_idx = 0; } - volk_32fc_32f_dot_prod_32fc_a(d_output, d_buffer, - d_aligned_taps, - ntaps()); + const gr_complex *ar = (gr_complex *)((unsigned long)(&d_buffer[d_idx]) & ~(d_align-1)); + unsigned al = (&d_buffer[d_idx]) - ar; + + volk_32fc_32f_dot_prod_32fc_a(d_output, ar, + d_aligned_taps[al], + ntaps()+al); return *d_output; } diff --git a/gr-filter/lib/qa_filter.cc b/gr-filter/lib/qa_filter.cc index 8aa24651c..b7760b19e 100644 --- a/gr-filter/lib/qa_filter.cc +++ b/gr-filter/lib/qa_filter.cc @@ -27,6 +27,7 @@ #include <qa_filter.h> #include <qa_firdes.h> +#include <qa_fir_filter_with_buffer.h> #include <qa_mmse_fir_interpolator_cc.h> #include <qa_mmse_fir_interpolator_ff.h> @@ -36,6 +37,9 @@ qa_gr_filter::suite () CppUnit::TestSuite *s = new CppUnit::TestSuite ("gr-filter"); s->addTest(gr::filter::qa_firdes::suite()); + s->addTest(gr::filter::fff::qa_fir_filter_with_buffer_fff::suite()); + s->addTest(gr::filter::ccc::qa_fir_filter_with_buffer_ccc::suite()); + s->addTest(gr::filter::ccf::qa_fir_filter_with_buffer_ccf::suite()); s->addTest(gr::filter::qa_mmse_fir_interpolator_cc::suite()); s->addTest(gr::filter::qa_mmse_fir_interpolator_ff::suite()); diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.cc b/gr-filter/lib/qa_fir_filter_with_buffer.cc new file mode 100644 index 000000000..c97e2479c --- /dev/null +++ b/gr-filter/lib/qa_fir_filter_with_buffer.cc @@ -0,0 +1,408 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <gr_types.h> +#include <qa_fir_filter_with_buffer.h> +#include <filter/fir_filter_with_buffer.h> +#include <fft/fft.h> +#include <cppunit/TestAssert.h> +#include <cmath> +#include <random.h> + + +namespace gr { + namespace filter { + +#define MAX_DATA (16383) +#define ERR_DELTA (1e-5) + +#define NELEM(x) (sizeof(x) / sizeof(x[0])) + + static float + uniform() + { + return 2.0 * ((float) random() / RANDOM_MAX - 0.5); // uniformly (-1, 1) + } + + static void + random_floats(float *buf, unsigned n) + { + for(unsigned i = 0; i < n; i++) + buf[i] = (float)rint(uniform() * 32767); + } + + static void + random_complex(gr_complex *buf, unsigned n) + { + for(unsigned i = 0; i < n; i++) { + float re = rint(uniform () * MAX_DATA); + float im = rint(uniform () * MAX_DATA); + buf[i] = gr_complex(re, im); + } + } + + namespace fff { + + typedef float i_type; + typedef float o_type; + typedef float tap_type; + typedef float acc_type; + + using std::vector; + + static o_type + ref_dotprod(const i_type input[], const tap_type taps[], int ntaps) + { + acc_type sum = 0; + for (int i = 0; i < ntaps; i++) { + sum += input[i] * taps[i]; + } + return sum; + } + + void + qa_fir_filter_with_buffer_fff::t1() + { + test_decimate(1); + } + + void + qa_fir_filter_with_buffer_fff::t2() + { + test_decimate(2); + } + + void + qa_fir_filter_with_buffer_fff::t3() + { + test_decimate(5); + } + + // + // Test for ntaps in [0,9], and input lengths in [0,17]. + // This ensures that we are building the shifted taps correctly, + // and exercises all corner cases on input alignment and length. + // + void + qa_fir_filter_with_buffer_fff::test_decimate(unsigned int decimate) + { + const int MAX_TAPS = 29; + const int OUTPUT_LEN = 37; + const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + + // Mem aligned buffer not really necessary, but why not? + i_type *input = fft::malloc_float(INPUT_LEN); + i_type *dline = fft::malloc_float(INPUT_LEN); + o_type *expected_output = fft::malloc_float(OUTPUT_LEN); + o_type *actual_output = fft::malloc_float(OUTPUT_LEN); + tap_type *taps = fft::malloc_float(MAX_TAPS); + + srandom(0); // we want reproducibility + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + + for(int n = 0; n <= MAX_TAPS; n++) { + for(int ol = 0; ol <= OUTPUT_LEN; ol++) { + + // build random test case + random_floats(input, INPUT_LEN); + random_floats(taps, MAX_TAPS); + + // compute expected output values + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + for(int o = 0; o < (int)(ol/decimate); o++) { + // use an actual delay line for this test + for(int dd = 0; dd < (int)decimate; dd++) { + for(int oo = INPUT_LEN-1; oo > 0; oo--) + dline[oo] = dline[oo-1]; + dline[0] = input[decimate*o+dd]; + } + expected_output[o] = ref_dotprod(dline, taps, n); + } + + // build filter + vector<tap_type> f1_taps(&taps[0], &taps[n]); + kernel::fir_filter_with_buffer_fff *f1 = \ + new kernel::fir_filter_with_buffer_fff(f1_taps); + + // zero the output, then do the filtering + memset(actual_output, 0, sizeof(actual_output)); + f1->filterNdec(actual_output, input, ol/decimate, decimate); + + // check results + // + // we use a sloppy error margin because on the x86 architecture, + // our reference implementation is using 80 bit floating point + // arithmetic, while the SSE version is using 32 bit float point + // arithmetic. + + for(int o = 0; o < (int)(ol/decimate); o++) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(expected_output[o], actual_output[o], + sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA); + } + delete f1; + } + } + fft::free(input); + fft::free(dline); + fft::free(expected_output); + fft::free(actual_output); + fft::free(taps); + } + + } /* namespace fff */ + + + /**************************************************************/ + + + namespace ccc { + + typedef gr_complex i_type; + typedef gr_complex o_type; + typedef gr_complex tap_type; + typedef gr_complex acc_type; + + using std::vector; + + static o_type + ref_dotprod(const i_type input[], const tap_type taps[], int ntaps) + { + acc_type sum = 0; + for(int i = 0; i < ntaps; i++) { + sum += input[i] * taps[i]; + } + + return sum; + } + + void + qa_fir_filter_with_buffer_ccc::t1() + { + test_decimate(1); + } + + void + qa_fir_filter_with_buffer_ccc::t2() + { + test_decimate(2); + } + + void + qa_fir_filter_with_buffer_ccc::t3() + { + test_decimate(5); + } + + // + // Test for ntaps in [0,9], and input lengths in [0,17]. + // This ensures that we are building the shifted taps correctly, + // and exercises all corner cases on input alignment and length. + // + void + qa_fir_filter_with_buffer_ccc::test_decimate(unsigned int decimate) + { + const int MAX_TAPS = 29; + const int OUTPUT_LEN = 37; + const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + + // Mem aligned buffer not really necessary, but why not? + i_type *input = fft::malloc_complex(INPUT_LEN); + i_type *dline = fft::malloc_complex(INPUT_LEN); + o_type *expected_output = fft::malloc_complex(OUTPUT_LEN); + o_type *actual_output = fft::malloc_complex(OUTPUT_LEN); + tap_type *taps = fft::malloc_complex(MAX_TAPS); + + srandom(0); // we want reproducibility + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + + for(int n = 0; n <= MAX_TAPS; n++) { + for(int ol = 0; ol <= OUTPUT_LEN; ol++) { + + // build random test case + random_complex(input, INPUT_LEN); + random_complex(taps, MAX_TAPS); + + // compute expected output values + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + for(int o = 0; o < (int)(ol/decimate); o++) { + // use an actual delay line for this test + for(int dd = 0; dd < (int)decimate; dd++) { + for(int oo = INPUT_LEN-1; oo > 0; oo--) + dline[oo] = dline[oo-1]; + dline[0] = input[decimate*o+dd]; + } + expected_output[o] = ref_dotprod(dline, taps, n); + } + + // build filter + vector<tap_type> f1_taps(&taps[0], &taps[n]); + kernel::fir_filter_with_buffer_ccc *f1 = \ + new kernel::fir_filter_with_buffer_ccc(f1_taps); + + // zero the output, then do the filtering + memset(actual_output, 0, sizeof(actual_output)); + f1->filterNdec(actual_output, input, ol/decimate, decimate); + + // check results + // + // we use a sloppy error margin because on the x86 architecture, + // our reference implementation is using 80 bit floating point + // arithmetic, while the SSE version is using 32 bit float point + // arithmetic. + + for(int o = 0; o < (int)(ol/decimate); o++) { + CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o], + sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA); + } + delete f1; + } + } + fft::free(input); + fft::free(dline); + fft::free(expected_output); + fft::free(actual_output); + fft::free(taps); + } + + } /* namespace ccc */ + + + /**************************************************************/ + + namespace ccf { + + typedef gr_complex i_type; + typedef gr_complex o_type; + typedef float tap_type; + typedef gr_complex acc_type; + + using std::vector; + + static o_type + ref_dotprod(const i_type input[], const tap_type taps[], int ntaps) + { + acc_type sum = 0; + for(int i = 0; i < ntaps; i++) { + sum += input[i] * taps[i]; + } + + //return gr_complex(121,9)*sum; + return sum; + } + + void + qa_fir_filter_with_buffer_ccf::t1() + { + test_decimate(1); + } + + void + qa_fir_filter_with_buffer_ccf::t2() + { + test_decimate(2); + } + + void + qa_fir_filter_with_buffer_ccf::t3() + { + test_decimate(5); + } + + // + // Test for ntaps in [0,9], and input lengths in [0,17]. + // This ensures that we are building the shifted taps correctly, + // and exercises all corner cases on input alignment and length. + // + void + qa_fir_filter_with_buffer_ccf::test_decimate(unsigned int decimate) + { + const int MAX_TAPS = 29; + const int OUTPUT_LEN = 37; + const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN; + + // Mem aligned buffer not really necessary, but why not? + i_type *input = fft::malloc_complex(INPUT_LEN); + i_type *dline = fft::malloc_complex(INPUT_LEN); + o_type *expected_output = fft::malloc_complex(OUTPUT_LEN); + o_type *actual_output = fft::malloc_complex(OUTPUT_LEN); + tap_type *taps = fft::malloc_float(MAX_TAPS); + + srandom(0); // we want reproducibility + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + + for(int n = 0; n <= MAX_TAPS; n++) { + for(int ol = 0; ol <= OUTPUT_LEN; ol++) { + + // build random test case + random_complex(input, INPUT_LEN); + random_floats(taps, MAX_TAPS); + + // compute expected output values + memset(dline, 0, INPUT_LEN*sizeof(i_type)); + for(int o = 0; o < (int)(ol/decimate); o++) { + // use an actual delay line for this test + for(int dd = 0; dd < (int)decimate; dd++) { + for(int oo = INPUT_LEN-1; oo > 0; oo--) + dline[oo] = dline[oo-1]; + dline[0] = input[decimate*o+dd]; + } + expected_output[o] = ref_dotprod(dline, taps, n); + } + + // build filter + vector<tap_type> f1_taps(&taps[0], &taps[n]); + kernel::fir_filter_with_buffer_ccf *f1 = \ + new kernel::fir_filter_with_buffer_ccf(f1_taps); + + // zero the output, then do the filtering + memset(actual_output, 0, sizeof(actual_output)); + f1->filterNdec(actual_output, input, ol/decimate, decimate); + + // check results + // + // we use a sloppy error margin because on the x86 architecture, + // our reference implementation is using 80 bit floating point + // arithmetic, while the SSE version is using 32 bit float point + // arithmetic. + + for(int o = 0; o < (int)(ol/decimate); o++) { + CPPUNIT_ASSERT_COMPLEXES_EQUAL(expected_output[o], actual_output[o], + sqrt((float)n)*0.25*MAX_DATA*MAX_DATA * ERR_DELTA); + } + delete f1; + } + } + fft::free(input); + fft::free(dline); + fft::free(expected_output); + fft::free(actual_output); + fft::free(taps); + } + + } /* namespace ccf */ + + } /* namespace filter */ +} /* namespace gr */ diff --git a/gr-filter/lib/qa_fir_filter_with_buffer.h b/gr-filter/lib/qa_fir_filter_with_buffer.h new file mode 100644 index 000000000..8850ada20 --- /dev/null +++ b/gr-filter/lib/qa_fir_filter_with_buffer.h @@ -0,0 +1,94 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ +#ifndef _QA_FIR_FILTER_WITH_BUFFER_H_ +#define _QA_FIR_FILTER_WITH_BUFFER_H_ + +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/TestCase.h> + +namespace gr { + namespace filter { + + namespace fff { + + class qa_fir_filter_with_buffer_fff : public CppUnit::TestCase + { + CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_fff); + CPPUNIT_TEST(t1); + CPPUNIT_TEST(t2); + CPPUNIT_TEST(t3); + CPPUNIT_TEST_SUITE_END(); + + private: + void test_decimate(unsigned int decimate); + + void t1(); + void t2(); + void t3(); + }; + + } /* namespace fff */ + + namespace ccc { + + class qa_fir_filter_with_buffer_ccc : public CppUnit::TestCase + { + CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccc); + CPPUNIT_TEST(t1); + CPPUNIT_TEST(t2); + CPPUNIT_TEST(t3); + CPPUNIT_TEST_SUITE_END(); + + private: + void test_decimate(unsigned int decimate); + + void t1(); + void t2(); + void t3(); + }; + + } /* namespace ccc */ + + namespace ccf { + + class qa_fir_filter_with_buffer_ccf : public CppUnit::TestCase + { + CPPUNIT_TEST_SUITE(qa_fir_filter_with_buffer_ccf); + CPPUNIT_TEST(t1); + CPPUNIT_TEST(t2); + CPPUNIT_TEST(t3); + CPPUNIT_TEST_SUITE_END(); + + private: + void test_decimate(unsigned int decimate); + + void t1(); + void t2(); + void t3(); + }; + + } /* namespace ccf */ + + } /* namespace filter */ +} /* namespace gr */ + +#endif /* _QA_FIR_FILTER_WITH_BUFFER_H_ */ |