diff options
9 files changed, 48 insertions, 19 deletions
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc index c894d62aa..1a9273af0 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc @@ -45,6 +45,7 @@ gri_fft_filter_ccc_generic::~gri_fft_filter_ccc_generic () { delete d_fwdfft; delete d_invfft; + gri_fft_free(d_xformed_taps); } #if 0 @@ -115,7 +116,7 @@ gri_fft_filter_ccc_generic::compute_sizes(int ntaps) delete d_invfft; d_fwdfft = new gri_fft_complex(d_fftsize, true, d_nthreads); d_invfft = new gri_fft_complex(d_fftsize, false, d_nthreads); - d_xformed_taps.resize(d_fftsize); + d_xformed_taps = gri_fft_malloc_complex(d_fftsize); } } @@ -152,7 +153,7 @@ gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_comp d_fwdfft->execute(); // compute fwd xform gr_complex *a = d_fwdfft->get_outbuf(); - gr_complex *b = &d_xformed_taps[0]; + gr_complex *b = d_xformed_taps; gr_complex *c = d_invfft->get_inbuf(); volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize); diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h index 217b9ab83..899b59e03 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.h @@ -44,8 +44,8 @@ class GR_CORE_API gri_fft_filter_ccc_generic gri_fft_complex *d_invfft; // inverse "plan" int d_nthreads; // number of FFTW threads to use std::vector<gr_complex> d_tail; // state carried between blocks for overlap-add - std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps std::vector<gr_complex> d_new_taps; + gr_complex *d_xformed_taps; // Fourier xformed taps void compute_sizes(int ntaps); int tailsize() const { return d_ntaps - 1; } diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc index e7f66b714..0989c9621 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc @@ -44,6 +44,7 @@ gri_fft_filter_fff_generic::~gri_fft_filter_fff_generic () { delete d_fwdfft; delete d_invfft; + gri_fft_free(d_xformed_taps); } /* @@ -102,7 +103,7 @@ gri_fft_filter_fff_generic::compute_sizes(int ntaps) delete d_invfft; d_fwdfft = new gri_fft_real_fwd(d_fftsize); d_invfft = new gri_fft_real_rev(d_fftsize); - d_xformed_taps.resize(d_fftsize/2+1); + d_xformed_taps = gri_fft_malloc_complex(d_fftsize/2+1); } } @@ -139,7 +140,7 @@ gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *outpu d_fwdfft->execute(); // compute fwd xform gr_complex *a = d_fwdfft->get_outbuf(); - gr_complex *b = &d_xformed_taps[0]; + gr_complex *b = d_xformed_taps; gr_complex *c = d_invfft->get_inbuf(); volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize/2+1); diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h index be31068aa..6ac30cef5 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.h @@ -41,8 +41,8 @@ class GR_CORE_API gri_fft_filter_fff_generic gri_fft_real_rev *d_invfft; // inverse "plan" int d_nthreads; // number of FFTW threads to use std::vector<float> d_tail; // state carried between blocks for overlap-add - std::vector<gr_complex> d_xformed_taps; // Fourier xformed taps std::vector<float> d_new_taps; + gr_complex *d_xformed_taps; // Fourier xformed taps void compute_sizes(int ntaps); diff --git a/gnuradio-core/src/lib/general/gri_fft.cc b/gnuradio-core/src/lib/general/gri_fft.cc index 63e307776..2edb5f5aa 100644 --- a/gnuradio-core/src/lib/general/gri_fft.cc +++ b/gnuradio-core/src/lib/general/gri_fft.cc @@ -47,6 +47,24 @@ static int my_fftw_read_char(void *f) { return fgetc((FILE *) f); } #include <boost/filesystem/path.hpp> namespace fs = boost::filesystem; +gr_complex * +gri_fft_malloc_complex(int size) +{ + return (gr_complex*)fftwf_malloc(sizeof(gr_complex)*size); +} + +float * +gri_fft_malloc_float(int size) +{ + return (float*)fftwf_malloc(sizeof(float)*size); +} + +void +gri_fft_free(void *b) +{ + fftwf_free(b); +} + boost::mutex & gri_fft_planner::mutex() { diff --git a/gnuradio-core/src/lib/general/gri_fft.h b/gnuradio-core/src/lib/general/gri_fft.h index ed80badf1..f77a18e52 100644 --- a/gnuradio-core/src/lib/general/gri_fft.h +++ b/gnuradio-core/src/lib/general/gri_fft.h @@ -30,6 +30,19 @@ #include <gr_complex.h> #include <boost/thread.hpp> +/*! \brief Helper function for allocating complex fft buffers + */ +gr_complex* gri_fft_malloc_complex(int size); + +/*! \brief Helper function for allocating float fft buffers + */ +float* gri_fft_malloc_float(int size); + +/*! \brief Helper function for freeing fft buffers + */ +void gri_fft_free(void *b); + + /*! * \brief Export reference to planner mutex for those apps that * want to use FFTW w/o using the gri_fftw* classes. diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h index 8f2fc791e..15fa282fb 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h @@ -22,7 +22,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; - float min_val = -2147483648; + float min_val = -2147483647; float max_val = 2147483647; float r; @@ -71,7 +71,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; - float min_val = -2147483648; + float min_val = -2147483647; float max_val = 2147483647; float r; diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index d8493454b..d203546c6 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -22,10 +22,8 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; - //float min_val = -2147483647; - //float max_val = 2147483647; - float min_val = -2146400000; - float max_val = 2146400000; + float min_val = -2147483647; + float max_val = 2147483647; float r; __m128 vScalar = _mm_set_ps1(scalar); diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h index a865e0737..cde9240cc 100644 --- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h @@ -196,7 +196,10 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const #if LV_HAVE_SSE && LV_HAVE_32 static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - + + volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_bytes); + +#if 0 asm volatile ( " #pushl %%ebp\n\t" @@ -307,12 +310,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const } return; - - - - - - +#endif } #endif /*LV_HAVE_SSE*/ |