summaryrefslogtreecommitdiff
path: root/gr-filter/lib
diff options
context:
space:
mode:
authorTom Rondeau2012-06-13 14:53:41 -0400
committerTom Rondeau2012-06-13 14:53:41 -0400
commit9e2e896e9d4dbc4627702cde82a48e9ee5136f26 (patch)
treefb62fd58ec90f34e080d1dc2f52b2200b57fa0ed /gr-filter/lib
parent46bd64dc8231402535bad742a74250599e595148 (diff)
downloadgnuradio-9e2e896e9d4dbc4627702cde82a48e9ee5136f26.tar.gz
gnuradio-9e2e896e9d4dbc4627702cde82a48e9ee5136f26.tar.bz2
gnuradio-9e2e896e9d4dbc4627702cde82a48e9ee5136f26.zip
filter: process 4 vectors each time in volk dot_prod to speed up fir filters.
This makes the volk version of the SSE FIR filter the same speed as using the hand-crafted float_dotprod from before.
Diffstat (limited to 'gr-filter/lib')
-rw-r--r--gr-filter/lib/fir_filter.cc38
1 files changed, 35 insertions, 3 deletions
diff --git a/gr-filter/lib/fir_filter.cc b/gr-filter/lib/fir_filter.cc
index 098dd8d36..c6e179246 100644
--- a/gr-filter/lib/fir_filter.cc
+++ b/gr-filter/lib/fir_filter.cc
@@ -24,6 +24,7 @@
#include <fft/fft.h>
#include <volk/volk.h>
#include <cstdio>
+#include <float_dotprod_x86.h>
namespace gr {
namespace filter {
@@ -101,12 +102,16 @@ namespace gr {
{
return d_ntaps;
}
-
+
+ /*
float
fir_filter_fff::filter(const float input[])
{
volk_32f_x2_dot_prod_32f_a(d_output, input,
- d_aligned_taps[d_offset], d_ntaps+3);
+ d_aligned_taps[d_offset],
+ (d_ntaps + d_offset - 1) / 4 + 1);
+ //*d_output = float_dotprod_sse(input, d_aligned_taps[d_offset],
+ // (d_ntaps + d_offset - 1) / 4 + 1);
return *d_output;
}
@@ -126,7 +131,34 @@ namespace gr {
j += (d_offset == 0 ? 4 : 0);
}
}
+ */
+
+ float
+ fir_filter_fff::filter(const float input[])
+ {
+ //unsigned long ar = ((unsigned long) input);
+ //int off = (ar - (ar & ~15))/4;
+
+ const float *ar = (float *)((unsigned long) input & ~15);
+ unsigned al = input - ar;
+
+ volk_32f_x2_dot_prod_32f_a(d_output, ar,
+ d_aligned_taps[al],
+ (d_ntaps + al - 1) / 4 + 1);
+ //*d_output = float_dotprod_sse(input, d_aligned_taps[d_offset],
+ // (d_ntaps + d_offset - 1) / 4 + 1);
+ return *d_output;
+ }
+ void
+ fir_filter_fff::filterN(float output[],
+ const float input[],
+ unsigned long n)
+ {
+ for(unsigned long i = 0; i < n; i++) {
+ output[i] = filter(&input[i]);
+ }
+ }
void
fir_filter_fff::filterNdec(float output[],
@@ -136,7 +168,7 @@ namespace gr {
{
unsigned long j = 0;
for(unsigned long i = 0; i < n; i++) {
- filterN(&output[i], &input[j], 1);
+ output[i] = filter(&input[j]);
j += decimate;
}
}