summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Rondeau2012-06-14 12:26:45 -0400
committerTom Rondeau2012-06-14 12:26:45 -0400
commitdae3b39098c16573f9c70e77f1a9a6b09ecfd041 (patch)
tree4e4a30212bd0da649c464d515f3040028527b112
parentc24cabd47e2aaadf279c19aec73b311dd7ddce6c (diff)
downloadgnuradio-dae3b39098c16573f9c70e77f1a9a6b09ecfd041.tar.gz
gnuradio-dae3b39098c16573f9c70e77f1a9a6b09ecfd041.tar.bz2
gnuradio-dae3b39098c16573f9c70e77f1a9a6b09ecfd041.zip
filter: fff FIR filters use VOLK machine information to set alignment.
Works with SSE and AVX machines. SSE results comparable to float_dotprod still. AVX slightly faster.
-rw-r--r--gr-filter/include/filter/fir_filter.h2
-rw-r--r--gr-filter/lib/fir_filter.cc19
2 files changed, 13 insertions, 8 deletions
diff --git a/gr-filter/include/filter/fir_filter.h b/gr-filter/include/filter/fir_filter.h
index 7047ba1a4..99acb7a0a 100644
--- a/gr-filter/include/filter/fir_filter.h
+++ b/gr-filter/include/filter/fir_filter.h
@@ -56,6 +56,8 @@ namespace gr {
float *d_taps;
float **d_aligned_taps;
float *d_output;
+ int d_align;
+ int d_naligned;
};
/**************************************************************/
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 30c8325f7..1e5990294 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -33,6 +33,9 @@ namespace gr {
fir_filter_fff::fir_filter_fff(int decimation,
const std::vector<float> &taps)
{
+ d_align = volk_get_alignment();
+ d_naligned = d_align / sizeof(float);
+
d_taps = NULL;
set_taps(taps);
@@ -49,7 +52,7 @@ namespace gr {
}
// Free all aligned taps
- for(int i = 0; i < 4; i++) {
+ for(int i = 0; i < d_naligned; i++) {
fft::free(d_aligned_taps[i]);
}
fft::free(d_aligned_taps);
@@ -66,7 +69,7 @@ namespace gr {
fft::free(d_taps);
d_taps = NULL;
- for(int i = 0; i < 4; i++) {
+ for(int i = 0; i < d_naligned; i++) {
fft::free(d_aligned_taps[i]);
}
fft::free(d_aligned_taps);
@@ -79,10 +82,10 @@ namespace gr {
}
// Make a set of taps at all possible arch alignments
- d_aligned_taps = (float**)malloc(4*sizeof(float**));
- for(int i = 0; i < 4; i++) {
- d_aligned_taps[i] = fft::malloc_float(d_ntaps+3);
- memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+3));
+ d_aligned_taps = (float**)malloc(d_naligned*sizeof(float**));
+ for(int i = 0; i < d_naligned; i++) {
+ d_aligned_taps[i] = fft::malloc_float(d_ntaps+d_naligned-1);
+ memset(d_aligned_taps[i], 0, sizeof(float)*(d_ntaps+d_naligned-1));
memcpy(&d_aligned_taps[i][i], d_taps, sizeof(float)*(d_ntaps));
}
}
@@ -105,12 +108,12 @@ namespace gr {
float
fir_filter_fff::filter(const float input[])
{
- const float *ar = (float *)((unsigned long) input & ~15);
+ const float *ar = (float *)((unsigned long) input & ~(d_align-1));
unsigned al = input - ar;
volk_32f_x2_dot_prod_32f_a(d_output, ar,
d_aligned_taps[al],
- (d_ntaps + al - 1) / 4 + 1);
+ d_ntaps+al);
return *d_output;
}