From 6ac7576d32b8be601843c871327856ebd2b965d6 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 22 Jan 2012 17:44:04 -0500 Subject: sched: first attempt at working with aligned data sets. A block has an indicator on whether or not the buffers are aligned; they can use this to determine which Volk function to use or if to use Volk at all. --- gnuradio-core/src/lib/runtime/gr_block.cc | 30 ++++++++++++++++ gnuradio-core/src/lib/runtime/gr_block.h | 28 +++++++++++++++ gnuradio-core/src/lib/runtime/gr_block_executor.cc | 41 ++++++++++++++++++++-- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/gnuradio-core/src/lib/runtime/gr_block.cc b/gnuradio-core/src/lib/runtime/gr_block.cc index 9463869f5..78f77486b 100644 --- a/gnuradio-core/src/lib/runtime/gr_block.cc +++ b/gnuradio-core/src/lib/runtime/gr_block.cc @@ -34,6 +34,9 @@ gr_block::gr_block (const std::string &name, gr_io_signature_sptr output_signature) : gr_basic_block(name, input_signature, output_signature), d_output_multiple (1), + d_output_multiple_set(false), + d_unaligned(0), + d_is_unaligned(false), d_relative_rate (1.0), d_history(1), d_fixed_rate(false), @@ -75,9 +78,36 @@ gr_block::set_output_multiple (int multiple) if (multiple < 1) throw std::invalid_argument ("gr_block::set_output_multiple"); + d_output_multiple_set = true; d_output_multiple = multiple; } +void +gr_block::set_alignment (int multiple) +{ + if (multiple < 1) + throw std::invalid_argument ("gr_block::set_alignment_multiple"); + + d_output_multiple = multiple; +} + +void +gr_block::set_unaligned (int na) +{ + // unaligned value must be less than 0 and it doesn't make sense + // that it's larger than the alignment value. + if ((na < 0) || (na > d_output_multiple)) + throw std::invalid_argument ("gr_block::set_unaligned"); + + d_unaligned = na; +} + +void +gr_block::set_is_unaligned (bool u) +{ + d_is_unaligned = u; +} + void gr_block::set_relative_rate (double relative_rate) { diff --git a/gnuradio-core/src/lib/runtime/gr_block.h b/gnuradio-core/src/lib/runtime/gr_block.h index 86e0583e9..9171942e0 100644 --- a/gnuradio-core/src/lib/runtime/gr_block.h +++ b/gnuradio-core/src/lib/runtime/gr_block.h @@ -152,7 +152,32 @@ class GR_CORE_API gr_block : public gr_basic_block { */ void set_output_multiple (int multiple); int output_multiple () const { return d_output_multiple; } + bool output_multiple_set () const { return d_output_multiple_set; } + /*! + * \brief Constrains buffers to work on a set item alignment (for SIMD) + * + * set_alignment_multiple causes the scheduler to ensure that the noutput_items + * argument passed to forecast and general_work will be an integer multiple + * of \param multiple The default value is 1. + * + * This control is similar to the output_multiple setting, except + * that if the number of items passed to the block is less than the + * output_multiple, this value is ignored and the block can produce + * like normal. The d_unaligned value is set to the number of items + * the block is off by. In the next call to general_work, the + * noutput_items is set to d_unaligned or less until + * d_unaligned==0. The buffers are now aligned again and the aligned + * calls can be performed again. + */ + void set_alignment (int multiple); + int alignment () const { return d_output_multiple; } + + void set_unaligned (int na); + int unaligned () const { return d_unaligned; } + void set_is_unaligned (bool u); + bool is_unaligned () const { return d_is_unaligned; } + /*! * \brief Tell the scheduler \p how_many_items of input stream \p which_input were consumed. */ @@ -231,6 +256,9 @@ class GR_CORE_API gr_block : public gr_basic_block { private: int d_output_multiple; + bool d_output_multiple_set; + int d_unaligned; + bool d_is_unaligned; double d_relative_rate; // approx output_rate / input_rate gr_block_detail_sptr d_detail; // implementation details unsigned d_history; diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index ef53baf78..1c52c0e13 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -36,7 +36,7 @@ #include // must be defined to either 0 or 1 -#define ENABLE_LOGGING 0 +#define ENABLE_LOGGING 1 #if (ENABLE_LOGGING) #define LOG(x) do { x; } while(0) @@ -183,6 +183,7 @@ gr_block_executor::run_one_iteration() int noutput_items; int max_items_avail; int max_noutput_items = d_max_noutput_items; + int new_alignment; gr_block *m = d_block.get(); gr_block_detail *d = m->detail().get(); @@ -284,7 +285,7 @@ gr_block_executor::run_one_iteration() } // determine the minimum available output space - noutput_items = min_available_space (d, m->output_multiple ()); + noutput_items = min_available_space (d, m->output_multiple ()) - m->output_multiple(); if (ENABLE_LOGGING){ *d_log << " regular "; if (m->relative_rate() >= 1.0) @@ -307,7 +308,11 @@ gr_block_executor::run_one_iteration() // try to work it forward starting with max_items_avail. // We want to try to consume all the input we've got. int reqd_noutput_items = m->fixed_rate_ninput_to_noutput(max_items_avail); - reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple()); + + // only test this if we specifically set the output_multiple + if(m->output_multiple_set()) + reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple()); + if (reqd_noutput_items > 0 && reqd_noutput_items <= noutput_items) noutput_items = reqd_noutput_items; @@ -316,6 +321,30 @@ gr_block_executor::run_one_iteration() } noutput_items = std::min(noutput_items, max_noutput_items); + // Check if we're still unaligned; only use up items until we're + // aligned again. Otherwise, make sure we set the alignment + // requirement. + if(m->is_unaligned()) { + if(noutput_items >= m->unaligned()) { + noutput_items = round_up(noutput_items, m->alignment()) \ + - (m->alignment() - m->unaligned()); + new_alignment = 0; + } + else { + new_alignment = m->unaligned() - noutput_items; + } + } + else if(noutput_items < m->alignment()) { + //m->set_unaligned(m->alignment()); + new_alignment = m->alignment() - noutput_items; + m->set_unaligned(new_alignment); + m->set_is_unaligned(true); + } + else { + noutput_items = round_down(noutput_items, m->alignment()); + m->set_is_unaligned(false); + } + // ask the block how much input they need to produce noutput_items m->forecast (noutput_items, d_ninput_items_required); @@ -379,6 +408,12 @@ gr_block_executor::run_one_iteration() LOG(*d_log << " general_work: noutput_items = " << noutput_items << " result = " << n << std::endl); + // Adjust number of unaligned items left to process + if(m->is_unaligned()) { + m->set_unaligned(new_alignment); + m->set_is_unaligned(m->unaligned() != 0); + } + if(!propagate_tags(m->tag_propagation_policy(), d, d_start_nitems_read, m->relative_rate(), d_returned_tags)) -- cgit From 32cbee3e091021741bb2bcf0d4a8bddd6581c8ab Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 22 Jan 2012 17:46:11 -0500 Subject: core: link to Volk. --- gnuradio-core/src/lib/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gnuradio-core/src/lib/CMakeLists.txt b/gnuradio-core/src/lib/CMakeLists.txt index 52339fc6c..b2b63e270 100644 --- a/gnuradio-core/src/lib/CMakeLists.txt +++ b/gnuradio-core/src/lib/CMakeLists.txt @@ -68,6 +68,9 @@ if(LINUX) list(APPEND gnuradio_core_libs rt) endif() +# Link against libvolk +list(APPEND gnuradio_core_libs volk) + add_library(gnuradio-core SHARED ${gnuradio_core_sources}) target_link_libraries(gnuradio-core ${gnuradio_core_libs}) GR_LIBRARY_FOO(gnuradio-core RUNTIME_COMPONENT "core_runtime" DEVEL_COMPONENT "core_devel") -- cgit From accc35f380e56af156153cd01540a46c7eb98f12 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 22 Jan 2012 17:46:22 -0500 Subject: core: test case of using Volk to convert from float to short. --- gnuradio-core/src/lib/general/gr_float_to_short.cc | 24 ++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc index 084f76f9c..415ea6982 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc @@ -27,6 +27,9 @@ #include #include #include +#include + +#include gr_float_to_short_sptr gr_make_float_to_short () @@ -39,6 +42,9 @@ gr_float_to_short::gr_float_to_short () gr_make_io_signature (1, 1, sizeof (float)), gr_make_io_signature (1, 1, sizeof (short))) { + const int alignment_multiple = + volk_get_alignment() / sizeof(short); + set_alignment(alignment_multiple); } int @@ -49,8 +55,22 @@ gr_float_to_short::work (int noutput_items, const float *in = (const float *) input_items[0]; short *out = (short *) output_items[0]; - gri_float_to_short (in, out, noutput_items); - +#if 0 + if(is_unaligned()) { + float d_scale = 1.0; + //gri_float_to_short (in, out, noutput_items); + volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); + } + else { + float d_scale = 1.0; + volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items); + } +#else + float d_scale = 1.0; + volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); + //gri_float_to_short (in, out, noutput_items); +#endif + return noutput_items; } -- cgit From 06f40e8dbef0b082e23703bc22775ec3458faf5b Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 22 Jan 2012 17:51:13 -0500 Subject: sched: forgot to turn debugging off. --- gnuradio-core/src/lib/runtime/gr_block_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index 1c52c0e13..350518e4c 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -36,7 +36,7 @@ #include // must be defined to either 0 or 1 -#define ENABLE_LOGGING 1 +#define ENABLE_LOGGING 0 #if (ENABLE_LOGGING) #define LOG(x) do { x; } while(0) -- cgit From 97f6e8152646c05b570671231d2bab428696aa89 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 23 Jan 2012 16:40:36 -0500 Subject: build: look for local volk headers instead of installed. --- gnuradio-core/src/lib/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/gnuradio-core/src/lib/CMakeLists.txt b/gnuradio-core/src/lib/CMakeLists.txt index b2b63e270..f9756feba 100644 --- a/gnuradio-core/src/lib/CMakeLists.txt +++ b/gnuradio-core/src/lib/CMakeLists.txt @@ -42,6 +42,7 @@ list(APPEND test_gnuradio_core_sources bug_work_around_6.cc) # Setup the include and linker paths ######################################################################## include_directories(${GNURADIO_CORE_INCLUDE_DIRS}) +include_directories(${VOLK_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS}) link_directories(${Boost_LIBRARY_DIRS}) -- cgit From 09be95bb8da5f41aff2fd2207fcdb6bb8f92c8cf Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 23 Jan 2012 16:41:32 -0500 Subject: sched: better comments. Handling of noutput_items adjustment done better and documented. --- gnuradio-core/src/lib/runtime/gr_block_executor.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index 350518e4c..3191246a7 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -36,7 +36,7 @@ #include // must be defined to either 0 or 1 -#define ENABLE_LOGGING 0 +#define ENABLE_LOGGING 1 #if (ENABLE_LOGGING) #define LOG(x) do { x; } while(0) @@ -83,6 +83,11 @@ min_available_space (gr_block_detail *d, int output_multiple) } return 0; } + else if (n > output_multiple) { + // adjust this or we often ask for too many, + // causing a re-calc for fewer items. + n = n-output_multiple; + } min_space = std::min (min_space, n); } return min_space; @@ -285,7 +290,7 @@ gr_block_executor::run_one_iteration() } // determine the minimum available output space - noutput_items = min_available_space (d, m->output_multiple ()) - m->output_multiple(); + noutput_items = min_available_space (d, m->output_multiple ()); if (ENABLE_LOGGING){ *d_log << " regular "; if (m->relative_rate() >= 1.0) @@ -321,10 +326,14 @@ gr_block_executor::run_one_iteration() } noutput_items = std::min(noutput_items, max_noutput_items); - // Check if we're still unaligned; only use up items until we're + // Check if we're still unaligned; use up items until we're // aligned again. Otherwise, make sure we set the alignment // requirement. if(m->is_unaligned()) { + // When unaligned, don't just set noutput_items to the remaining + // samples to meet alignment; this causes too much overhead in + // requiring a premature call back here. Set the maximum amount + // of samples to handle unalignment and get us back aligned. if(noutput_items >= m->unaligned()) { noutput_items = round_up(noutput_items, m->alignment()) \ - (m->alignment() - m->unaligned()); @@ -335,12 +344,14 @@ gr_block_executor::run_one_iteration() } } else if(noutput_items < m->alignment()) { - //m->set_unaligned(m->alignment()); + // if we don't have enough for an aligned call, keep track of + // misalignment, set unaligned flag, and proceed. new_alignment = m->alignment() - noutput_items; m->set_unaligned(new_alignment); m->set_is_unaligned(true); } else { + // enough to round down to the nearest alignment and process. noutput_items = round_down(noutput_items, m->alignment()); m->set_is_unaligned(false); } -- cgit From d825bb2bc1c7bc4105723ae5f699f283bc4f3c44 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 26 Jan 2012 17:53:49 -0500 Subject: volk: float_to_short now clips the values instead of wrapping around. --- volk/include/volk/volk_32f_s32f_convert_16i_a.h | 52 +++++++++++++++++++++---- volk/include/volk/volk_32f_s32f_convert_16i_u.h | 52 +++++++++++++++++++++---- 2 files changed, 90 insertions(+), 14 deletions(-) diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h index 0a2b4f0f2..10c921b08 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h @@ -21,17 +21,29 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int16_t* outputVectorPtr = outputVector; + + float min_val = -32768; + float max_val = 32767; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1, inputVal2; __m128i intInputVal1, intInputVal2; + __m128 ret1, ret2; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < eighthPoints; number++){ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); - intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); - + // Scale and clip + ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); + + intInputVal1 = _mm_cvtps_epi32(ret1); + intInputVal2 = _mm_cvtps_epi32(ret2); + intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); @@ -40,7 +52,12 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const number = eighthPoints * 8; for(; number < num_points; number++){ - *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -61,8 +78,15 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int16_t* outputVectorPtr = outputVector; + + float min_val = -32768; + float max_val = 32767; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -70,7 +94,8 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const ret = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + // Scale and clip + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]); @@ -81,7 +106,12 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -98,9 +128,17 @@ static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, co int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -32768; + float max_val = 32767; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar)); + r = *inputVectorPtr++ * scalar; + if(r < min_val) + r = min_val; + else if(r > max_val) + r = max_val; + *outputVectorPtr++ = (int16_t)(r); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index dec3f1611..f339a7d10 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -21,17 +21,29 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int16_t* outputVectorPtr = outputVector; + + float min_val = -32768; + float max_val = 32767; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1, inputVal2; __m128i intInputVal1, intInputVal2; + __m128 ret1, ret2; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < eighthPoints; number++){ inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); - intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); - + // Scale and clip + ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); + + intInputVal1 = _mm_cvtps_epi32(ret1); + intInputVal2 = _mm_cvtps_epi32(ret2); + intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); @@ -40,7 +52,12 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const number = eighthPoints * 8; for(; number < num_points; number++){ - outputVector[number] = (int16_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -62,8 +79,15 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int16_t* outputVectorPtr = outputVector; + + float min_val = -32768; + float max_val = 32767; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -71,7 +95,8 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const ret = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + // Scale and clip + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]); @@ -82,7 +107,12 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int16_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -100,9 +130,17 @@ static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, co int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -32768; + float max_val = 32767; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar)); + r = *inputVectorPtr++ * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + *outputVectorPtr++ = (int16_t)(r); } } #endif /* LV_HAVE_GENERIC */ -- cgit From d8b02979cef097971bc0656b904f7b51d19b03c9 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 26 Jan 2012 20:07:26 -0500 Subject: volk: fix a warning. --- volk/include/volk/volk_64u_popcnt_a.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/volk/include/volk/volk_64u_popcnt_a.h b/volk/include/volk/volk_64u_popcnt_a.h index bdaa98643..4683f1e38 100644 --- a/volk/include/volk/volk_64u_popcnt_a.h +++ b/volk/include/volk/volk_64u_popcnt_a.h @@ -10,10 +10,11 @@ static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) { - const uint32_t* valueVector = (const uint32_t*)&value; + //const uint32_t* valueVector = (const uint32_t*)&value; // This is faster than a lookup table - uint32_t retVal = valueVector[0]; + //uint32_t retVal = valueVector[0]; + uint32_t retVal = (uint32_t)(value && 0x00000000FFFFFFFF); retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); @@ -22,7 +23,8 @@ static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value retVal = (retVal + (retVal >> 16)) & 0x0000003F; uint64_t retVal64 = retVal; - retVal = valueVector[1]; + //retVal = valueVector[1]; + retVal = (uint32_t)((value && 0xFFFFFFFF00000000) >> 31); retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555); retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333); retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F; -- cgit From 42d9560a50bbbab143b48bda5a73a5379818ddbe Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 26 Jan 2012 20:07:51 -0500 Subject: volk: float_to_int and float_to_char updated to clip instead of wrap around. The float to int clips at smaller than 2^32 because of the limits of the float representation. --- volk/include/volk/volk_32f_s32f_convert_32i_a.h | 60 ++++++++++++++++++++++--- volk/include/volk/volk_32f_s32f_convert_32i_u.h | 45 ++++++++++++++++--- volk/include/volk/volk_32f_s32f_convert_8i_a.h | 53 ++++++++++++++++++---- volk/include/volk/volk_32f_s32f_convert_8i_u.h | 53 ++++++++++++++++++---- 4 files changed, 183 insertions(+), 28 deletions(-) diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h index aa370e614..15fa282fb 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h @@ -21,14 +21,22 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; + + float min_val = -2147483647; + float max_val = 2147483647; + float r; + __m256 vScalar = _mm256_set1_ps(scalar); __m256 inputVal1; __m256i intInputVal1; + __m256 vmin_val = _mm256_set1_ps(min_val); + __m256 vmax_val = _mm256_set1_ps(max_val); for(;number < eighthPoints; number++){ inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8; - intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar)); + inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + intInputVal1 = _mm256_cvtps_epi32(inputVal1); _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1); outputVectorPtr += 8; @@ -36,7 +44,12 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const number = eighthPoints * 8; for(; number < num_points; number++){ - outputVector[number] = (int32_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int32_t)(r); } } #endif /* LV_HAVE_AVX */ @@ -57,14 +70,22 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; + + float min_val = -2147483647; + float max_val = 2147483647; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1; __m128i intInputVal1; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < quarterPoints; number++){ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); + inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + intInputVal1 = _mm_cvtps_epi32(inputVal1); _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); outputVectorPtr += 4; @@ -72,7 +93,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int32_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int32_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -93,8 +119,15 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; + + float min_val = -2147483647; + float max_val = 2147483647; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -102,7 +135,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const ret = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]); @@ -113,7 +146,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int32_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int32_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -130,9 +168,17 @@ static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, co int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -2147483647; + float max_val = 2147483647; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar)); + r = *inputVectorPtr++ * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + *outputVectorPtr++ = (int32_t)(r); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index b4e954dc4..d8493454b 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -21,14 +21,24 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; + + //float min_val = -2147483647; + //float max_val = 2147483647; + float min_val = -2146400000; + float max_val = 2146400000; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1; __m128i intInputVal1; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < quarterPoints; number++){ inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); + inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + intInputVal1 = _mm_cvtps_epi32(inputVal1); _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); outputVectorPtr += 4; @@ -36,7 +46,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int32_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int32_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -58,8 +73,15 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; + + float min_val = -2147483647; + float max_val = 2147483647; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -67,7 +89,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const ret = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]); @@ -78,7 +100,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int32_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int32_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -96,9 +123,17 @@ static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, co int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -2147483647; + float max_val = 2147483647; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar)); + r = *inputVectorPtr++ * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + *outputVectorPtr++ = (int32_t)(r); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h index 8d87a07d7..05172171c 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h @@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f const float* inputVectorPtr = (const float*)inputVector; int8_t* outputVectorPtr = outputVector; + + float min_val = -128; + float max_val = 127; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1, inputVal2, inputVal3, inputVal4; __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < sixteenthPoints; number++){ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; @@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); - intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); - intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar)); - intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar)); + inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); + inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val); + inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val); + + intInputVal1 = _mm_cvtps_epi32(inputVal1); + intInputVal2 = _mm_cvtps_epi32(inputVal2); + intInputVal3 = _mm_cvtps_epi32(inputVal3); + intInputVal4 = _mm_cvtps_epi32(inputVal4); intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4); @@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f number = sixteenthPoints * 16; for(; number < num_points; number++){ - outputVector[number] = (int8_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int8_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -67,9 +84,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl const unsigned int quarterPoints = num_points / 4; const float* inputVectorPtr = (const float*)inputVector; + + float min_val = -128; + float max_val = 127; + float r; + int8_t* outputVectorPtr = outputVector; __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -77,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl ret = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]); @@ -88,7 +112,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int8_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int8_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -105,9 +134,17 @@ static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, cons int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -128; + float max_val = 127; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = (int8_t)(*inputVectorPtr++ * scalar); + r = *inputVectorPtr++ * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + *outputVectorPtr++ = (int8_t)(r); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index 1c6bf87c9..12991e9c1 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f const float* inputVectorPtr = (const float*)inputVector; int8_t* outputVectorPtr = outputVector; + + float min_val = -128; + float max_val = 127; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 inputVal1, inputVal2, inputVal3, inputVal4; __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); for(;number < sixteenthPoints; number++){ inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; @@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); - intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); - intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar)); - intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar)); + inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); + inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); + inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val); + inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val); + + intInputVal1 = _mm_cvtps_epi32(inputVal1); + intInputVal2 = _mm_cvtps_epi32(inputVal2); + intInputVal3 = _mm_cvtps_epi32(inputVal3); + intInputVal4 = _mm_cvtps_epi32(inputVal4); intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4); @@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f number = sixteenthPoints * 16; for(; number < num_points; number++){ - outputVector[number] = (int8_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE2 */ @@ -69,8 +86,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl const float* inputVectorPtr = (const float*)inputVector; int8_t* outputVectorPtr = outputVector; + + float min_val = -128; + float max_val = 127; + float r; + __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; + __m128 vmin_val = _mm_set_ps1(min_val); + __m128 vmax_val = _mm_set_ps1(max_val); __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; @@ -78,7 +102,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl ret = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; - ret = _mm_mul_ps(ret, vScalar); + ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]); @@ -89,7 +113,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl number = quarterPoints * 4; for(; number < num_points; number++){ - outputVector[number] = (int8_t)(inputVector[number] * scalar); + r = inputVector[number] * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + outputVector[number] = (int16_t)(r); } } #endif /* LV_HAVE_SSE */ @@ -107,9 +136,17 @@ static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, cons int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; + float min_val = -128; + float max_val = 127; + float r; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar)); + r = *inputVectorPtr++ * scalar; + if(r > max_val) + r = max_val; + else if(r < min_val) + r = min_val; + *outputVectorPtr++ = (int16_t)(r); } } #endif /* LV_HAVE_GENERIC */ -- cgit From 3737aa4b86af3b489ef3b91d8580bcd893295042 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 26 Jan 2012 20:20:49 -0500 Subject: core: moved float_to_X type converters over to use Volk calls. --- gnuradio-core/src/lib/general/gr_float_to_char.cc | 18 +++++++++++++++++- gnuradio-core/src/lib/general/gr_float_to_int.cc | 18 +++++++++++++++++- gnuradio-core/src/lib/general/gr_float_to_short.cc | 13 ++++--------- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc index 88b9d276e..2d7e21f2e 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc @@ -27,6 +27,7 @@ #include #include #include +#include gr_float_to_char_sptr gr_make_float_to_char () @@ -39,6 +40,9 @@ gr_float_to_char::gr_float_to_char () gr_make_io_signature (1, 1, sizeof (float)), gr_make_io_signature (1, 1, sizeof (char))) { + const int alignment_multiple = + volk_get_alignment() / sizeof(char); + set_alignment(alignment_multiple); } int @@ -46,10 +50,22 @@ gr_float_to_char::work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { +#if 1 + float d_scale = 1.0; const float *in = (const float *) input_items[0]; - char *out = (char *) output_items[0]; + int8_t *out = (int8_t *) output_items[0]; + if(is_unaligned()) { + volk_32f_s32f_convert_8i_u(out, in, d_scale, noutput_items); + } + else { + volk_32f_s32f_convert_8i_a(out, in, d_scale, noutput_items); + } +#else + const float *in = (const float *) input_items[0]; + char *out = (char *) output_items[0]; gri_float_to_char (in, out, noutput_items); +#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc index 2349de8cb..2ca723c7c 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc @@ -27,6 +27,7 @@ #include #include #include +#include gr_float_to_int_sptr gr_make_float_to_int () @@ -39,6 +40,9 @@ gr_float_to_int::gr_float_to_int () gr_make_io_signature (1, 1, sizeof (float)), gr_make_io_signature (1, 1, sizeof (int))) { + const int alignment_multiple = + volk_get_alignment() / sizeof(int); + set_alignment(alignment_multiple); } int @@ -46,10 +50,22 @@ gr_float_to_int::work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { +#if 1 + float d_scale = 1.0; const float *in = (const float *) input_items[0]; - int *out = (int *) output_items[0]; + int32_t *out = (int32_t *) output_items[0]; + if(is_unaligned()) { + volk_32f_s32f_convert_32i_u(out, in, d_scale, noutput_items); + } + else { + volk_32f_s32f_convert_32i_a(out, in, d_scale, noutput_items); + } +#else + const float *in = (const float *) input_items[0]; + int *out = (int *) output_items[0]; gri_float_to_int (in, out, noutput_items); +#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc index 415ea6982..6c4d031ac 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc @@ -29,8 +29,6 @@ #include #include -#include - gr_float_to_short_sptr gr_make_float_to_short () { @@ -55,20 +53,17 @@ gr_float_to_short::work (int noutput_items, const float *in = (const float *) input_items[0]; short *out = (short *) output_items[0]; -#if 0 +#if 1 + float d_scale = 1.0; + //volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); if(is_unaligned()) { - float d_scale = 1.0; - //gri_float_to_short (in, out, noutput_items); volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); } else { - float d_scale = 1.0; volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items); } #else - float d_scale = 1.0; - volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); - //gri_float_to_short (in, out, noutput_items); + gri_float_to_short (in, out, noutput_items); #endif return noutput_items; -- cgit From 2a2663d625847217a237acba0229738a81003eef Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 26 Jan 2012 20:22:10 -0500 Subject: QA: type converter QA codes added and updated for Volk. Some small differences between Volk and non-Volk for rounding issues are made here. --- gnuradio-core/src/python/gnuradio/gr/Makefile.am | 2 + .../src/python/gnuradio/gr/qa_float_to_char.py | 64 +++++++++++++++++++ .../src/python/gnuradio/gr/qa_float_to_int.py | 12 ++-- .../src/python/gnuradio/gr/qa_float_to_short.py | 71 ++++++++++++++++++++++ .../src/python/gnuradio/gr/qa_float_to_uchar.py | 64 +++++++++++++++++++ 5 files changed, 209 insertions(+), 4 deletions(-) create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py mode change 100644 => 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am index f5af80c78..16dd14790 100644 --- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am +++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am @@ -60,7 +60,9 @@ noinst_PYTHON = \ qa_fft.py \ qa_fft_filter.py \ qa_filter_delay_fc.py \ + qa_float_to_char.py \ qa_float_to_int.py \ + qa_float_to_short.py \ qa_fractional_interpolator.py \ qa_frequency_modulator.py \ qa_fsk_stuff.py \ diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py new file mode 100755 index 000000000..45df71d0a --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest +class test_float_to_char (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_001(self): + + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) + expected_result = [0, 1, 2, 3, 4, 5, 255, 254, 253] + src = gr.vector_source_f(src_data) + op = gr.float_to_char() + dst = gr.vector_sink_b() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + + def test_002(self): + + src_data = ( 126.0, 127.0, 128.0) + expected_result = [ 126, 127, 127 ] + + src = gr.vector_source_f(src_data) + op = gr.float_to_char() + # Note: vector_sink_b returns uchar + dst = gr.vector_sink_b() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_float_to_char, "test_float_to_char.xml") + diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py old mode 100644 new mode 100755 index 3e0b847a2..4cc3d0056 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py @@ -34,6 +34,10 @@ class test_float_to_int (gr_unittest.TestCase): src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) expected_result = [int(round(s)) for s in src_data] + + ### Volk results + expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5] + src = gr.vector_source_f(src_data) op = gr.float_to_int() dst = gr.vector_sink_i() @@ -46,10 +50,10 @@ class test_float_to_int (gr_unittest.TestCase): def test_002(self): - src_data = ( 2147483647, 2147483648, 2200000000, - -2147483648, -2147483649, -2200000000) - expected_result = [ 2147483647, 2147483647, 2147483647, - -2147483647, -2147483647, -2147483647] + src_data = ( 2146400000, 2147483647, + -2146400000, -2147483648 ) + expected_result = [ 2146400000, 2146400000, + -2146400000, -2146400000 ] src = gr.vector_source_f(src_data) op = gr.float_to_int() dst = gr.vector_sink_i() diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py new file mode 100755 index 000000000..aa26668c8 --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest +import ctypes + +class test_float_to_short (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_001(self): + + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) + expected_result = [int(round(s)) for s in src_data] + + ### Volk results + expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5] + + src = gr.vector_source_f(src_data) + op = gr.float_to_short() + dst = gr.vector_sink_s() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + + def test_002(self): + + src_data = ( 32766, 32767, 32768, + -32767, -32768, -32769) + expected_result = [ 32766, 32767, 32767, + -32767, -32768, -32768 ] + + src = gr.vector_source_f(src_data) + op = gr.float_to_short() + dst = gr.vector_sink_s() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_float_to_short, "test_float_to_short.xml") + diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py new file mode 100755 index 000000000..0d54f45f3 --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest +import ctypes + +class test_float_to_uchar (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_001(self): + + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) + expected_result = [0, 1, 2, 3, 4, 6, 0, 0, 0, 0, 0] + src = gr.vector_source_f(src_data) + op = gr.float_to_uchar() + dst = gr.vector_sink_b() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + + def test_002(self): + + src_data = ( 254.0, 255.0, 256.0) + expected_result = [ 254, 255, 255 ] + src = gr.vector_source_f(src_data) + op = gr.float_to_uchar() + dst = gr.vector_sink_b() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_float_to_uchar, "test_float_to_uchar.xml") + -- cgit From 8f09eb204213492091fa8310b165c8ddb89b2801 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 12:49:00 -0500 Subject: volk: cleaned up float_to_X to be Volk-only and added vlen and scale to constructor to set the vector length and a free scaling with the data. --- gnuradio-core/src/lib/general/gr_float_to_char.cc | 42 +++++++++++--------- gnuradio-core/src/lib/general/gr_float_to_char.h | 16 ++++++-- gnuradio-core/src/lib/general/gr_float_to_char.i | 10 +++-- gnuradio-core/src/lib/general/gr_float_to_int.cc | 46 ++++++++++++---------- gnuradio-core/src/lib/general/gr_float_to_int.h | 16 ++++++-- gnuradio-core/src/lib/general/gr_float_to_int.i | 10 +++-- gnuradio-core/src/lib/general/gr_float_to_short.cc | 41 +++++++++++-------- gnuradio-core/src/lib/general/gr_float_to_short.h | 15 +++++-- gnuradio-core/src/lib/general/gr_float_to_short.i | 10 +++-- 9 files changed, 130 insertions(+), 76 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc index 2d7e21f2e..165172ac6 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004,2010 Free Software Foundation, Inc. + * Copyright 2004,2010,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,19 +30,32 @@ #include gr_float_to_char_sptr -gr_make_float_to_char () +gr_make_float_to_char (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_float_to_char ()); + return gnuradio::get_initial_sptr(new gr_float_to_char (vlen, scale)); } -gr_float_to_char::gr_float_to_char () +gr_float_to_char::gr_float_to_char (size_t vlen, float scale) : gr_sync_block ("gr_float_to_char", - gr_make_io_signature (1, 1, sizeof (float)), - gr_make_io_signature (1, 1, sizeof (char))) + gr_make_io_signature (1, 1, sizeof (float)*vlen), + gr_make_io_signature (1, 1, sizeof (char)*vlen)), + d_vlen(vlen), d_scale(scale) { - const int alignment_multiple = - volk_get_alignment() / sizeof(char); - set_alignment(alignment_multiple); + const int alignment_multiple = + volk_get_alignment() / sizeof(char); + set_alignment(alignment_multiple); +} + +float +gr_float_to_char::scale() const +{ + return d_scale; +} + +void +gr_float_to_char::set_scale(float scale) +{ + d_scale = scale; } int @@ -50,22 +63,15 @@ gr_float_to_char::work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { -#if 1 - float d_scale = 1.0; const float *in = (const float *) input_items[0]; int8_t *out = (int8_t *) output_items[0]; if(is_unaligned()) { - volk_32f_s32f_convert_8i_u(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_8i_u(out, in, d_scale, d_vlen*noutput_items); } else { - volk_32f_s32f_convert_8i_a(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_8i_a(out, in, d_scale, d_vlen*noutput_items); } -#else - const float *in = (const float *) input_items[0]; - char *out = (char *) output_items[0]; - gri_float_to_char (in, out, noutput_items); -#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.h b/gnuradio-core/src/lib/general/gr_float_to_char.h index 434e2e9d0..c88645a18 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.h +++ b/gnuradio-core/src/lib/general/gr_float_to_char.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,7 +30,7 @@ class gr_float_to_char; typedef boost::shared_ptr gr_float_to_char_sptr; GR_CORE_API gr_float_to_char_sptr -gr_make_float_to_char (); +gr_make_float_to_char (size_t vlen=1, float scale=1); /*! * \brief Convert stream of float to a stream of char @@ -39,10 +39,18 @@ gr_make_float_to_char (); class GR_CORE_API gr_float_to_char : public gr_sync_block { - friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char (); - gr_float_to_char (); + private: + friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char + (size_t vlen, float scale); + gr_float_to_char (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.i b/gnuradio-core/src/lib/general/gr_float_to_char.i index 05b206554..05939f1b1 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.i +++ b/gnuradio-core/src/lib/general/gr_float_to_char.i @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,9 +22,13 @@ GR_SWIG_BLOCK_MAGIC(gr,float_to_char) -gr_float_to_char_sptr gr_make_float_to_char (); +gr_float_to_char_sptr +gr_make_float_to_char (size_t vlen=1, float scale=1); class gr_float_to_char : public gr_sync_block { - gr_float_to_char (); +public: + float scale() const; + void set_scale(float scale); + gr_float_to_char (size_t vlen, float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc index 2ca723c7c..c4d9991af 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,42 +30,48 @@ #include gr_float_to_int_sptr -gr_make_float_to_int () +gr_make_float_to_int (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_float_to_int ()); + return gnuradio::get_initial_sptr(new gr_float_to_int (vlen, scale)); } -gr_float_to_int::gr_float_to_int () +gr_float_to_int::gr_float_to_int (size_t vlen, float scale) : gr_sync_block ("gr_float_to_int", - gr_make_io_signature (1, 1, sizeof (float)), - gr_make_io_signature (1, 1, sizeof (int))) + gr_make_io_signature (1, 1, sizeof (float)*vlen), + gr_make_io_signature (1, 1, sizeof (int)*vlen)), + d_vlen(vlen), d_scale(scale) { - const int alignment_multiple = - volk_get_alignment() / sizeof(int); - set_alignment(alignment_multiple); + const int alignment_multiple = + volk_get_alignment() / sizeof(int); + set_alignment(alignment_multiple); +} + +float +gr_float_to_int::scale() const +{ + return d_scale; +} + +void +gr_float_to_int::set_scale(float scale) +{ + d_scale = scale; } int gr_float_to_int::work (int noutput_items, - gr_vector_const_void_star &input_items, - gr_vector_void_star &output_items) + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) { -#if 1 - float d_scale = 1.0; const float *in = (const float *) input_items[0]; int32_t *out = (int32_t *) output_items[0]; if(is_unaligned()) { - volk_32f_s32f_convert_32i_u(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_32i_u(out, in, d_scale, d_vlen*noutput_items); } else { - volk_32f_s32f_convert_32i_a(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_32i_a(out, in, d_scale, d_vlen*noutput_items); } -#else - const float *in = (const float *) input_items[0]; - int *out = (int *) output_items[0]; - gri_float_to_int (in, out, noutput_items); -#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.h b/gnuradio-core/src/lib/general/gr_float_to_int.h index 3324ed110..0b42c0aab 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.h +++ b/gnuradio-core/src/lib/general/gr_float_to_int.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,7 +30,7 @@ class gr_float_to_int; typedef boost::shared_ptr gr_float_to_int_sptr; GR_CORE_API gr_float_to_int_sptr -gr_make_float_to_int (); +gr_make_float_to_int (size_t vlen=1, float scale=1); /*! * \brief Convert stream of float to a stream of short @@ -39,10 +39,18 @@ gr_make_float_to_int (); class GR_CORE_API gr_float_to_int : public gr_sync_block { - friend GR_CORE_API gr_float_to_int_sptr gr_make_float_to_int (); - gr_float_to_int (); + private: + friend GR_CORE_API + gr_float_to_int_sptr gr_make_float_to_int (size_t vlen, float scale); + gr_float_to_int (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.i b/gnuradio-core/src/lib/general/gr_float_to_int.i index 4ab04cbf2..a0b52a3c1 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.i +++ b/gnuradio-core/src/lib/general/gr_float_to_int.i @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,9 +22,13 @@ GR_SWIG_BLOCK_MAGIC(gr,float_to_int) -gr_float_to_int_sptr gr_make_float_to_int (); +gr_float_to_int_sptr +gr_make_float_to_int (size_t vlen=1, float scale=1); class gr_float_to_int : public gr_sync_block { - gr_float_to_int (); +public: + float scale() const; + void set_scale(float scale); + gr_float_to_int (size_t vlen, float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc index 6c4d031ac..e6ac5c184 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004,2010 Free Software Foundation, Inc. + * Copyright 2004,2010,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,19 +30,32 @@ #include gr_float_to_short_sptr -gr_make_float_to_short () +gr_make_float_to_short (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_float_to_short ()); + return gnuradio::get_initial_sptr(new gr_float_to_short (vlen, scale)); } -gr_float_to_short::gr_float_to_short () +gr_float_to_short::gr_float_to_short (size_t vlen, float scale) : gr_sync_block ("gr_float_to_short", - gr_make_io_signature (1, 1, sizeof (float)), - gr_make_io_signature (1, 1, sizeof (short))) + gr_make_io_signature (1, 1, sizeof (float)*vlen), + gr_make_io_signature (1, 1, sizeof (short)*vlen)), + d_vlen(vlen), d_scale(scale) { - const int alignment_multiple = - volk_get_alignment() / sizeof(short); - set_alignment(alignment_multiple); + const int alignment_multiple = + volk_get_alignment() / sizeof(short); + set_alignment(alignment_multiple); +} + +float +gr_float_to_short::scale() const +{ + return d_scale; +} + +void +gr_float_to_short::set_scale(float scale) +{ + d_scale = scale; } int @@ -53,18 +66,12 @@ gr_float_to_short::work (int noutput_items, const float *in = (const float *) input_items[0]; short *out = (short *) output_items[0]; -#if 1 - float d_scale = 1.0; - //volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); if(is_unaligned()) { - volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_16i_u(out, in, d_scale, d_vlen*noutput_items); } else { - volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items); + volk_32f_s32f_convert_16i_a(out, in, d_scale, d_vlen*noutput_items); } -#else - gri_float_to_short (in, out, noutput_items); -#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.h b/gnuradio-core/src/lib/general/gr_float_to_short.h index 010d61141..93e441f41 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.h +++ b/gnuradio-core/src/lib/general/gr_float_to_short.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,7 +30,7 @@ class gr_float_to_short; typedef boost::shared_ptr gr_float_to_short_sptr; GR_CORE_API gr_float_to_short_sptr -gr_make_float_to_short (); +gr_make_float_to_short (size_t vlen=1, float scale=1); /*! * \brief Convert stream of float to a stream of short @@ -39,10 +39,17 @@ gr_make_float_to_short (); class GR_CORE_API gr_float_to_short : public gr_sync_block { - friend GR_CORE_API gr_float_to_short_sptr gr_make_float_to_short (); - gr_float_to_short (); + friend GR_CORE_API + gr_float_to_short_sptr gr_make_float_to_short (size_t vlen, float scale); + gr_float_to_short (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.i b/gnuradio-core/src/lib/general/gr_float_to_short.i index ad059c453..8da733054 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.i +++ b/gnuradio-core/src/lib/general/gr_float_to_short.i @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,9 +22,13 @@ GR_SWIG_BLOCK_MAGIC(gr,float_to_short) -gr_float_to_short_sptr gr_make_float_to_short (); +gr_float_to_short_sptr +gr_make_float_to_short (size_t vlen=1, float scale=1); class gr_float_to_short : public gr_sync_block { - gr_float_to_short (); +public: + float scale() const; + void set_scale(float scale); + gr_float_to_short (size_t vlen, float scale); }; -- cgit From 9009ceb57a9eef4623216aa945cb0c4173c0ee2e Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 15:08:26 -0500 Subject: core: minor tweaks to float_to_X --- gnuradio-core/src/lib/general/gr_float_to_char.cc | 1 - gnuradio-core/src/lib/general/gr_float_to_char.i | 1 - gnuradio-core/src/lib/general/gr_float_to_int.cc | 1 - gnuradio-core/src/lib/general/gr_float_to_int.i | 1 - gnuradio-core/src/lib/general/gr_float_to_short.cc | 1 - gnuradio-core/src/lib/general/gr_float_to_short.i | 1 - 6 files changed, 6 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc index 165172ac6..14635ff71 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc @@ -26,7 +26,6 @@ #include #include -#include #include gr_float_to_char_sptr diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.i b/gnuradio-core/src/lib/general/gr_float_to_char.i index 05939f1b1..a1c88750f 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_char.i +++ b/gnuradio-core/src/lib/general/gr_float_to_char.i @@ -30,5 +30,4 @@ class gr_float_to_char : public gr_sync_block public: float scale() const; void set_scale(float scale); - gr_float_to_char (size_t vlen, float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc index c4d9991af..28214538f 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc @@ -26,7 +26,6 @@ #include #include -#include #include gr_float_to_int_sptr diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.i b/gnuradio-core/src/lib/general/gr_float_to_int.i index a0b52a3c1..6e71f54a9 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.i +++ b/gnuradio-core/src/lib/general/gr_float_to_int.i @@ -30,5 +30,4 @@ class gr_float_to_int : public gr_sync_block public: float scale() const; void set_scale(float scale); - gr_float_to_int (size_t vlen, float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc index e6ac5c184..188bfdae3 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc @@ -26,7 +26,6 @@ #include #include -#include #include gr_float_to_short_sptr diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.i b/gnuradio-core/src/lib/general/gr_float_to_short.i index 8da733054..072da5213 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_short.i +++ b/gnuradio-core/src/lib/general/gr_float_to_short.i @@ -30,5 +30,4 @@ class gr_float_to_short : public gr_sync_block public: float scale() const; void set_scale(float scale); - gr_float_to_short (size_t vlen, float scale); }; -- cgit From c05a75d0e1d28fe2c229a9a93ef42828929999b4 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 15:09:44 -0500 Subject: core: short_to_X: adding short_to_char block using Volk; made Volk version of short_to_float. --- gnuradio-core/src/lib/general/gr_short_to_char.cc | 67 ++++++++++++++++++++++ gnuradio-core/src/lib/general/gr_short_to_char.h | 56 ++++++++++++++++++ gnuradio-core/src/lib/general/gr_short_to_char.i | 30 ++++++++++ gnuradio-core/src/lib/general/gr_short_to_float.cc | 41 ++++++++++--- gnuradio-core/src/lib/general/gr_short_to_float.h | 18 ++++-- gnuradio-core/src/lib/general/gr_short_to_float.i | 9 ++- 6 files changed, 206 insertions(+), 15 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.cc create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.h create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.i diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.cc b/gnuradio-core/src/lib/general/gr_short_to_char.cc new file mode 100644 index 000000000..a3c096e6d --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_short_to_char.cc @@ -0,0 +1,67 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_short_to_char_sptr +gr_make_short_to_char (size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_short_to_char (vlen)); +} + +gr_short_to_char::gr_short_to_char (size_t vlen) + : gr_sync_block ("gr_short_to_char", + gr_make_io_signature (1, 1, sizeof (short)*vlen), + gr_make_io_signature (1, 1, sizeof (char)*vlen)), + d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(char); + set_alignment(alignment_multiple); +} + +int +gr_short_to_char::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + const int16_t *in = (const int16_t *) input_items[0]; + int8_t *out = (int8_t *) output_items[0]; + + if(is_unaligned()) { + volk_16i_convert_8i_u(out, in, d_vlen*noutput_items); + } + else { + volk_16i_convert_8i_a(out, in, d_vlen*noutput_items); + } + + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.h b/gnuradio-core/src/lib/general/gr_short_to_char.h new file mode 100644 index 000000000..9682d86ec --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_short_to_char.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_SHORT_TO_CHAR_H +#define INCLUDED_GR_SHORT_TO_CHAR_H + +#include +#include + +class gr_short_to_char; +typedef boost::shared_ptr gr_short_to_char_sptr; + +GR_CORE_API gr_short_to_char_sptr +gr_make_short_to_char (size_t vlen=1); + +/*! + * \brief Convert stream of short to a stream of float + * \ingroup converter_blk + */ + +class GR_CORE_API gr_short_to_char : public gr_sync_block +{ + private: + friend GR_CORE_API gr_short_to_char_sptr + gr_make_short_to_char (size_t vlen); + gr_short_to_char (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_SHORT_TO_CHAR_H */ diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.i b/gnuradio-core/src/lib/general/gr_short_to_char.i new file mode 100644 index 000000000..8fa453a06 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_short_to_char.i @@ -0,0 +1,30 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,short_to_char) + +gr_short_to_char_sptr gr_make_short_to_char (size_t vlen=1); + +class gr_short_to_char : public gr_sync_block +{ + +}; diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc index 7b80953ac..d11618414 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004,2010 Free Software Foundation, Inc. + * Copyright 2004,2010,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -27,18 +27,35 @@ #include #include #include +#include gr_short_to_float_sptr -gr_make_short_to_float () +gr_make_short_to_float (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_short_to_float ()); + return gnuradio::get_initial_sptr(new gr_short_to_float (vlen, scale)); } -gr_short_to_float::gr_short_to_float () +gr_short_to_float::gr_short_to_float (size_t vlen, float scale) : gr_sync_block ("gr_short_to_float", - gr_make_io_signature (1, 1, sizeof (short)), - gr_make_io_signature (1, 1, sizeof (float))) + gr_make_io_signature (1, 1, sizeof (short)*vlen), + gr_make_io_signature (1, 1, sizeof (float)*vlen)), + d_vlen(vlen), d_scale(scale) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +float +gr_short_to_float::scale() const +{ + return d_scale; +} + +void +gr_short_to_float::set_scale(float scale) +{ + d_scale = scale; } int @@ -49,7 +66,17 @@ gr_short_to_float::work (int noutput_items, const short *in = (const short *) input_items[0]; float *out = (float *) output_items[0]; - gri_short_to_float (in, out, noutput_items); +#if 0 + if(is_unaligned()) { + volk_16i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items); + } + else { + float d_scale = 1.0; + volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items); + } +#else + gri_short_to_float (in, out, d_vlen*noutput_items); +#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.h b/gnuradio-core/src/lib/general/gr_short_to_float.h index b40c966ea..efdc81ecd 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_float.h +++ b/gnuradio-core/src/lib/general/gr_short_to_float.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,7 +30,7 @@ class gr_short_to_float; typedef boost::shared_ptr gr_short_to_float_sptr; GR_CORE_API gr_short_to_float_sptr -gr_make_short_to_float (); +gr_make_short_to_float (size_t vlen=1, float scale=1); /*! * \brief Convert stream of short to a stream of float @@ -39,10 +39,18 @@ gr_make_short_to_float (); class GR_CORE_API gr_short_to_float : public gr_sync_block { - friend GR_CORE_API gr_short_to_float_sptr gr_make_short_to_float (); - gr_short_to_float (); - + private: + friend GR_CORE_API gr_short_to_float_sptr + gr_make_short_to_float (size_t vlen, float scale); + gr_short_to_float (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; + public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.i b/gnuradio-core/src/lib/general/gr_short_to_float.i index 56759df29..229618890 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_float.i +++ b/gnuradio-core/src/lib/general/gr_short_to_float.i @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004 Free Software Foundation, Inc. + * Copyright 2004,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,9 +22,12 @@ GR_SWIG_BLOCK_MAGIC(gr,short_to_float) -gr_short_to_float_sptr gr_make_short_to_float (); +gr_short_to_float_sptr +gr_make_short_to_float (size_t vlen=1, float scale=1); class gr_short_to_float : public gr_sync_block { - gr_short_to_float (); +public: + float scale() const; + void set_scale(float scale); }; -- cgit From 7943c197eccdb16e6c42af5544b9cb34a089b360 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 15:11:17 -0500 Subject: core: cleanup short_to_float. --- gnuradio-core/src/lib/general/gr_short_to_float.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc index d11618414..9f71f52ed 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc @@ -26,7 +26,6 @@ #include #include -#include #include gr_short_to_float_sptr @@ -66,17 +65,12 @@ gr_short_to_float::work (int noutput_items, const short *in = (const short *) input_items[0]; float *out = (float *) output_items[0]; -#if 0 if(is_unaligned()) { volk_16i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items); } else { - float d_scale = 1.0; volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items); } -#else - gri_short_to_float (in, out, d_vlen*noutput_items); -#endif return noutput_items; } -- cgit From c9a420645f16f5c28fdcf05c1c09b9a91b95e640 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 15:11:50 -0500 Subject: core: QA codes for float_to_X and short_to_X (note: float_to_int needs work). --- .../src/python/gnuradio/gr/qa_float_to_char.py | 20 ++++++- .../src/python/gnuradio/gr/qa_float_to_int.py | 22 ++++++- .../src/python/gnuradio/gr/qa_float_to_short.py | 20 ++++++- .../src/python/gnuradio/gr/qa_short_to_char.py | 69 +++++++++++++++++++++ .../src/python/gnuradio/gr/qa_short_to_float.py | 70 ++++++++++++++++++++++ 5 files changed, 197 insertions(+), 4 deletions(-) create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py index 45df71d0a..ecdd36228 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2011 Free Software Foundation, Inc. +# Copyright 2011,2012 Free Software Foundation, Inc. # # This file is part of GNU Radio # @@ -59,6 +59,24 @@ class test_float_to_char (gr_unittest.TestCase): self.assertEqual(expected_result, result_data) + def test_003(self): + + scale = 2 + vlen = 3 + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) + expected_result = [0, 2, 4, 6, 8, 11, 254, 252, 250] + src = gr.vector_source_f(src_data) + s2v = gr.stream_to_vector(gr.sizeof_float, vlen) + op = gr.float_to_char(vlen, scale) + v2s = gr.vector_to_stream(gr.sizeof_char, vlen) + dst = gr.vector_sink_b() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + if __name__ == '__main__': gr_unittest.run(test_float_to_char, "test_float_to_char.xml") diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py index 4cc3d0056..559f90f05 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py @@ -33,7 +33,6 @@ class test_float_to_int (gr_unittest.TestCase): def test_001(self): src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) - expected_result = [int(round(s)) for s in src_data] ### Volk results expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5] @@ -53,7 +52,7 @@ class test_float_to_int (gr_unittest.TestCase): src_data = ( 2146400000, 2147483647, -2146400000, -2147483648 ) expected_result = [ 2146400000, 2146400000, - -2146400000, -2146400000 ] + -2146400000, -2147483648 ] src = gr.vector_source_f(src_data) op = gr.float_to_int() dst = gr.vector_sink_i() @@ -64,6 +63,25 @@ class test_float_to_int (gr_unittest.TestCase): self.assertEqual(expected_result, result_data) + + def test_003(self): + + scale = 2 + vlen = 3 + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) + expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6,] + src = gr.vector_source_f(src_data) + s2v = gr.stream_to_vector(gr.sizeof_float, vlen) + op = gr.float_to_int(vlen, scale) + v2s = gr.vector_to_stream(gr.sizeof_int, vlen) + dst = gr.vector_sink_i() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + if __name__ == '__main__': gr_unittest.run(test_float_to_int, "test_float_to_int.xml") diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py index aa26668c8..926f1c08b 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2011 Free Software Foundation, Inc. +# Copyright 2011,2012 Free Software Foundation, Inc. # # This file is part of GNU Radio # @@ -66,6 +66,24 @@ class test_float_to_short (gr_unittest.TestCase): self.assertEqual(expected_result, result_data) + def test_003(self): + + scale = 2 + vlen = 3 + src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) + expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6] + src = gr.vector_source_f(src_data) + s2v = gr.stream_to_vector(gr.sizeof_float, vlen) + op = gr.float_to_short(vlen, scale) + v2s = gr.vector_to_stream(gr.sizeof_short, vlen) + dst = gr.vector_sink_s() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + if __name__ == '__main__': gr_unittest.run(test_float_to_short, "test_float_to_short.xml") diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py new file mode 100755 index 000000000..6a95fa01d --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# +# Copyright 2011,2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest +import ctypes + +class test_short_to_char (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_001(self): + + src_data = range(0, 32767, 32767/127) + src_data = [int(s) for s in src_data] + expected_result = range(0, 128) + src = gr.vector_source_s(src_data) + op = gr.short_to_char() + dst = gr.vector_sink_b() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + + def test_002(self): + + vlen = 3 + src_data = range(0, 32400, 32767/127) + src_data = [int(s) for s in src_data] + expected_result = range(0, 126) + src = gr.vector_source_s(src_data) + s2v = gr.stream_to_vector(gr.sizeof_short, vlen) + op = gr.short_to_char(vlen) + v2s = gr.vector_to_stream(gr.sizeof_char, vlen) + dst = gr.vector_sink_b() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_short_to_char, "test_short_to_char.xml") + diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py new file mode 100755 index 000000000..8f331b495 --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# +# Copyright 2011,2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest +import ctypes + +class test_short_to_float (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_001(self): + + src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5) + expected_result = [ 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, + -1.0, -2.0, -3.0, -4.0, -5.0] + + src = gr.vector_source_s(src_data) + op = gr.short_to_float() + dst = gr.vector_sink_f() + + self.tb.connect(src, op, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + + def test_002(self): + + vlen = 3 + src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3) + expected_result = [0.0, 1.0, 2.0, 3.0, 4.0, + 5.0, -1.0, -2.0, -3.0] + src = gr.vector_source_s(src_data) + s2v = gr.stream_to_vector(gr.sizeof_short, vlen) + op = gr.short_to_float(vlen) + v2s = gr.vector_to_stream(gr.sizeof_float, vlen) + dst = gr.vector_sink_f() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_short_to_float, "test_short_to_float.xml") + -- cgit From f63927a46517ea9c7e914feb9177896219b33a0d Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 19:25:04 -0500 Subject: core: more conversion work to Volk for type converters. --- gnuradio-core/src/lib/general/gr_char_to_float.cc | 39 ++++++++++++++++------ gnuradio-core/src/lib/general/gr_char_to_float.h | 14 ++++++-- gnuradio-core/src/lib/general/gr_char_to_float.i | 7 ++-- gnuradio-core/src/lib/general/gr_int_to_float.cc | 30 ++++++++++++----- gnuradio-core/src/lib/general/gr_int_to_float.h | 16 ++++++--- gnuradio-core/src/lib/general/gr_int_to_float.i | 8 +++-- gnuradio-core/src/lib/general/gr_short_to_char.i | 3 +- gnuradio-core/src/lib/general/gr_short_to_float.cc | 1 - .../src/python/gnuradio/gr/qa_int_to_float.py | 20 +++++++++++ 9 files changed, 105 insertions(+), 33 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.cc b/gnuradio-core/src/lib/general/gr_char_to_float.cc index e68f8d208..ffe8ee4a1 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_char_to_float.cc @@ -26,30 +26,47 @@ #include #include -#include +#include gr_char_to_float_sptr -gr_make_char_to_float () +gr_make_char_to_float (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_char_to_float ()); + return gnuradio::get_initial_sptr(new gr_char_to_float (vlen, scale)); } -gr_char_to_float::gr_char_to_float () +gr_char_to_float::gr_char_to_float (size_t vlen, float scale) : gr_sync_block ("gr_char_to_float", - gr_make_io_signature (1, 1, sizeof (char)), - gr_make_io_signature (1, 1, sizeof (float))) + gr_make_io_signature (1, 1, sizeof (char)*vlen), + gr_make_io_signature (1, 1, sizeof (float)*vlen)), + d_vlen(vlen), d_scale(scale) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +float +gr_char_to_float::scale() const +{ + return d_scale; +} + +void +gr_char_to_float::set_scale(float scale) +{ + d_scale = scale; } int gr_char_to_float::work (int noutput_items, - gr_vector_const_void_star &input_items, - gr_vector_void_star &output_items) + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) { - const char *in = (const char *) input_items[0]; + const int8_t *in = (const int8_t *) input_items[0]; float *out = (float *) output_items[0]; - gri_char_to_float (in, out, noutput_items); - + // Note: the unaligned benchmarked much faster than the aligned + volk_8i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items); + return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.h b/gnuradio-core/src/lib/general/gr_char_to_float.h index b20d2066f..4ad8e59a8 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_float.h +++ b/gnuradio-core/src/lib/general/gr_char_to_float.h @@ -30,7 +30,7 @@ class gr_char_to_float; typedef boost::shared_ptr gr_char_to_float_sptr; GR_CORE_API gr_char_to_float_sptr -gr_make_char_to_float (); +gr_make_char_to_float (size_t vlen=1, float scale=1); /*! * \brief Convert stream of chars to a stream of float @@ -39,10 +39,18 @@ gr_make_char_to_float (); class GR_CORE_API gr_char_to_float : public gr_sync_block { - friend GR_CORE_API gr_char_to_float_sptr gr_make_char_to_float (); - gr_char_to_float (); + private: + friend GR_CORE_API gr_char_to_float_sptr + gr_make_char_to_float (size_t vlen, float scale); + gr_char_to_float (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.i b/gnuradio-core/src/lib/general/gr_char_to_float.i index 0403b621d..65ad861f2 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_float.i +++ b/gnuradio-core/src/lib/general/gr_char_to_float.i @@ -22,9 +22,12 @@ GR_SWIG_BLOCK_MAGIC(gr,char_to_float) -gr_char_to_float_sptr gr_make_char_to_float (); +gr_char_to_float_sptr +gr_make_char_to_float (size_t vlen=1, float scale=1); class gr_char_to_float : public gr_sync_block { - gr_char_to_float (); +public: + float scale() const; + void set_scale(float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.cc b/gnuradio-core/src/lib/general/gr_int_to_float.cc index 29ca22add..dca0e1b89 100644 --- a/gnuradio-core/src/lib/general/gr_int_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_int_to_float.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -27,18 +27,23 @@ #include #include #include +#include gr_int_to_float_sptr -gr_make_int_to_float () +gr_make_int_to_float (size_t vlen, float scale) { - return gnuradio::get_initial_sptr(new gr_int_to_float ()); + return gnuradio::get_initial_sptr(new gr_int_to_float (vlen, scale)); } -gr_int_to_float::gr_int_to_float () +gr_int_to_float::gr_int_to_float (size_t vlen, float scale) : gr_sync_block ("gr_int_to_float", - gr_make_io_signature (1, 1, sizeof (int32_t)), - gr_make_io_signature (1, 1, sizeof (float))) + gr_make_io_signature (1, 1, sizeof (int32_t)*vlen), + gr_make_io_signature (1, 1, sizeof (float)*vlen)), + d_vlen(vlen), d_scale(scale) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -48,8 +53,17 @@ gr_int_to_float::work (int noutput_items, { const int32_t *in = (const int32_t *) input_items[0]; float *out = (float *) output_items[0]; - - gri_int_to_float(in, out, noutput_items); + +#if 1 + if(is_unaligned()) { + volk_32i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items); + } + else { + volk_32i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items); + } +#else + gri_int_to_float(in, out, d_vlen*noutput_items); +#endif return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.h b/gnuradio-core/src/lib/general/gr_int_to_float.h index 9af381ba9..af6488a50 100644 --- a/gnuradio-core/src/lib/general/gr_int_to_float.h +++ b/gnuradio-core/src/lib/general/gr_int_to_float.h @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -30,7 +30,7 @@ class gr_int_to_float; typedef boost::shared_ptr gr_int_to_float_sptr; GR_CORE_API gr_int_to_float_sptr -gr_make_int_to_float (); +gr_make_int_to_float (size_t vlen=1, float scale=1); /*! * \brief Convert stream of int to a stream of float @@ -39,10 +39,18 @@ gr_make_int_to_float (); class GR_CORE_API gr_int_to_float : public gr_sync_block { - friend GR_CORE_API gr_int_to_float_sptr gr_make_int_to_float (); - gr_int_to_float (); + private: + friend GR_CORE_API gr_int_to_float_sptr + gr_make_int_to_float (size_t vlen, float scale); + gr_int_to_float (size_t vlen, float scale); + + size_t d_vlen; + float d_scale; public: + float scale() const; + void set_scale(float scale); + virtual int work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.i b/gnuradio-core/src/lib/general/gr_int_to_float.i index 8cb9e35b5..c1f25e37b 100644 --- a/gnuradio-core/src/lib/general/gr_int_to_float.i +++ b/gnuradio-core/src/lib/general/gr_int_to_float.i @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2011 Free Software Foundation, Inc. + * Copyright 2011,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,9 +22,11 @@ GR_SWIG_BLOCK_MAGIC(gr,int_to_float) -gr_int_to_float_sptr gr_make_int_to_float (); +gr_int_to_float_sptr +gr_make_int_to_float (size_t vlen=1, float scale=1); class gr_int_to_float : public gr_sync_block { - gr_int_to_float (); + float scale() const; + void set_scale(float scale); }; diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.i b/gnuradio-core/src/lib/general/gr_short_to_char.i index 8fa453a06..330a4fdda 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_char.i +++ b/gnuradio-core/src/lib/general/gr_short_to_char.i @@ -22,7 +22,8 @@ GR_SWIG_BLOCK_MAGIC(gr,short_to_char) -gr_short_to_char_sptr gr_make_short_to_char (size_t vlen=1); +gr_short_to_char_sptr +gr_make_short_to_char (size_t vlen=1); class gr_short_to_char : public gr_sync_block { diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc index 9f71f52ed..94d376a27 100644 --- a/gnuradio-core/src/lib/general/gr_short_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc @@ -71,7 +71,6 @@ gr_short_to_float::work (int noutput_items, else { volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items); } - return noutput_items; } diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py index edfc26409..530b2a5cc 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py @@ -44,6 +44,26 @@ class test_int_to_float (gr_unittest.TestCase): self.assertFloatTuplesAlmostEqual(expected_result, result_data) + def test_002(self): + + vlen = 3 + src_data = ( 65000, 65001, 65002, 65003, 65004, 65005, + -65001, -65002, -65003) + expected_result = [ 65000.0, 65001.0, 65002.0, + 65003.0, 65004.0, 65005.0, + -65001.0, -65002.0, -65003.0] + src = gr.vector_source_i(src_data) + s2v = gr.stream_to_vector(gr.sizeof_int, vlen) + op = gr.int_to_float(vlen) + v2s = gr.vector_to_stream(gr.sizeof_float, vlen) + dst = gr.vector_sink_f() + + self.tb.connect(src, s2v, op, v2s, dst) + self.tb.run() + result_data = list(dst.data()) + + self.assertEqual(expected_result, result_data) + if __name__ == '__main__': gr_unittest.run(test_int_to_float, "test_int_to_float.xml") -- cgit From f6075ca94945510eddc5581b552f5e61ce1d0c46 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 19:25:41 -0500 Subject: core: more type converters in Volk; adding char_to_short converter. --- gnuradio-core/src/lib/general/CMakeLists.txt | 3 ++ gnuradio-core/src/lib/general/Makefile.am | 2 + gnuradio-core/src/lib/general/general.i | 6 +++ gnuradio-core/src/lib/general/gr_char_to_short.cc | 64 +++++++++++++++++++++++ gnuradio-core/src/lib/general/gr_char_to_short.h | 55 +++++++++++++++++++ gnuradio-core/src/lib/general/gr_char_to_short.i | 30 +++++++++++ 6 files changed, 160 insertions(+) create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.cc create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.h create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.i diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 6ecaa930a..6635abc8d 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -187,6 +187,7 @@ set(gr_core_general_triple_threats gr_bin_statistics_f gr_bytes_to_syms gr_char_to_float + gr_char_to_short gr_check_counting_s gr_check_lfsr_32k_s gr_complex_to_interleaved_short @@ -229,6 +230,7 @@ set(gr_core_general_triple_threats gr_kludge_copy gr_lfsr_32k_source_s gr_map_bb + gr_multiply_cc gr_nlog10_ff gr_nop gr_null_sink @@ -256,6 +258,7 @@ set(gr_core_general_triple_threats gr_rms_ff gr_repeat gr_short_to_float + gr_short_to_char gr_simple_correlator gr_simple_framer gr_simple_squelch_cc diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index 0122932cf..1b802b09c 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -46,6 +46,7 @@ libgeneral_la_SOURCES = \ gr_bin_statistics_f.cc \ gr_bytes_to_syms.cc \ gr_char_to_float.cc \ + gr_char_to_short.cc \ gr_check_counting_s.cc \ gr_check_lfsr_32k_s.cc \ gr_circular_file.cc \ @@ -123,6 +124,7 @@ libgeneral_la_SOURCES = \ gr_rms_cf.cc \ gr_rms_ff.cc \ gr_short_to_float.cc \ + gr_short_to_int.cc \ gr_int_to_float.cc \ gr_simple_correlator.cc \ gr_simple_framer.cc \ diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index 5a701bf80..ec90e40e5 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -44,8 +44,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -104,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -158,8 +161,10 @@ %include "gr_float_to_char.i" %include "gr_float_to_uchar.i" %include "gr_short_to_float.i" +%include "gr_short_to_char.i" %include "gr_int_to_float.i" %include "gr_char_to_float.i" +%include "gr_char_to_short.i" %include "gr_uchar_to_float.i" %include "gr_frequency_modulator_fc.i" %include "gr_phase_modulator_fc.i" @@ -218,6 +223,7 @@ %include "gr_diff_decoder_bb.i" %include "gr_framer_sink_1.i" %include "gr_map_bb.i" +%include "gr_multiply_cc.i" %include "gr_feval.i" %include "gr_pwr_squelch_cc.i" %include "gr_pwr_squelch_ff.i" diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc new file mode 100644 index 000000000..40ffa9338 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc @@ -0,0 +1,64 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_char_to_short_sptr +gr_make_char_to_short (size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_char_to_short (vlen)); +} + +gr_char_to_short::gr_char_to_short (size_t vlen) + : gr_sync_block ("gr_char_to_short", + gr_make_io_signature (1, 1, sizeof (char)*vlen), + gr_make_io_signature (1, 1, sizeof (short)*vlen)), + d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +int +gr_char_to_short::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + const int8_t *in = (const int8_t *) input_items[0]; + int16_t *out = (int16_t *) output_items[0]; + + if(is_unaligned()) { + volk_8i_convert_16i_u(out, in, d_vlen*noutput_items); + } + else { + volk_8i_convert_16i_a(out, in, d_vlen*noutput_items); + } + + return noutput_items; +} diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.h b/gnuradio-core/src/lib/general/gr_char_to_short.h new file mode 100644 index 000000000..8ed974acf --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_char_to_short.h @@ -0,0 +1,55 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011,2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_CHAR_TO_SHORT_H +#define INCLUDED_GR_CHAR_TO_SHORT_H + +#include +#include + +class gr_char_to_short; +typedef boost::shared_ptr gr_char_to_short_sptr; + +GR_CORE_API gr_char_to_short_sptr +gr_make_char_to_short (size_t vlen=1); + +/*! + * \brief Convert stream of chars to a stream of float + * \ingroup converter_blk + */ + +class GR_CORE_API gr_char_to_short : public gr_sync_block +{ + friend GR_CORE_API gr_char_to_short_sptr + gr_make_char_to_short (size_t vlen); + gr_char_to_short (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_CHAR_TO_SHORT_H */ diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.i b/gnuradio-core/src/lib/general/gr_char_to_short.i new file mode 100644 index 000000000..48ddbf26b --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_char_to_short.i @@ -0,0 +1,30 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,char_to_short) + +gr_char_to_short_sptr gr_make_char_to_short (size_t vlen=1); + +class gr_char_to_short : public gr_sync_block +{ + +}; -- cgit From 7ecd13dbeebb5083d309b2a74d158bb1bb26066e Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 19:29:45 -0500 Subject: core: update Makefile.am for new type converters. --- gnuradio-core/src/lib/general/CMakeLists.txt | 2 +- gnuradio-core/src/lib/general/Makefile.am | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 6635abc8d..6dc9d411d 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2010-2011 Free Software Foundation, Inc. +# Copyright 2010-2012 Free Software Foundation, Inc. # # This file is part of GNU Radio # diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index 1b802b09c..65b5a729e 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2001,2002,2004,2006,2007,2008,2009 Free Software Foundation, Inc. +# Copyright 2001,2002,2004,2006-2012 Free Software Foundation, Inc. # # This file is part of GNU Radio # @@ -197,6 +197,7 @@ grinclude_HEADERS = \ gr_bin_statistics_f.h \ gr_bytes_to_syms.h \ gr_char_to_float.h \ + gr_char_to_short.h \ gr_check_counting_s.h \ gr_check_lfsr_32k_s.h \ gr_circular_file.h \ @@ -278,6 +279,7 @@ grinclude_HEADERS = \ gr_reverse.h \ gr_rms_cf.h \ gr_rms_ff.h \ + gr_short_to_char.h \ gr_short_to_float.h \ gr_int_to_float.h \ gr_simple_correlator.h \ @@ -325,7 +327,7 @@ grinclude_HEADERS = \ gri_int_to_float.h \ gri_lfsr_15_1_0.h \ gri_lfsr_32k.h \ - gri_short_to_float.h \ + gri_short_to_char.h \ gri_uchar_to_float.h \ malloc16.h \ random.h \ @@ -363,6 +365,7 @@ swiginclude_HEADERS = \ gr_bin_statistics_f.i \ gr_bytes_to_syms.i \ gr_char_to_float.i \ + gr_char_to_short.i \ gr_check_counting_s.i \ gr_check_lfsr_32k_s.i \ gr_complex_to_interleaved_short.i \ @@ -432,6 +435,7 @@ swiginclude_HEADERS = \ gr_rms_cf.i \ gr_rms_ff.i \ gr_repeat.i \ + gr_short_to_char.i \ gr_short_to_float.i \ gr_simple_correlator.i \ gr_simple_framer.i \ -- cgit From d870487437b33d1a26eb8f5f5aaa414ca6ed24e0 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 28 Jan 2012 19:31:33 -0500 Subject: volk: fix lower bound of int conversion. --- volk/include/volk/volk_32f_s32f_convert_32i_a.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h index 15fa282fb..8f2fc791e 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h @@ -22,7 +22,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; - float min_val = -2147483647; + float min_val = -2147483648; float max_val = 2147483647; float r; @@ -71,7 +71,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const const float* inputVectorPtr = (const float*)inputVector; int32_t* outputVectorPtr = outputVector; - float min_val = -2147483647; + float min_val = -2147483648; float max_val = 2147483647; float r; -- cgit From 9c9b9a8aa05c7cde7cd11cd5d1e052630715d252 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 29 Jan 2012 17:31:37 -0500 Subject: volk: added unaligned volk function for magnitude of a complex number. --- volk/include/volk/volk_32fc_magnitude_32f_u.h | 118 ++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 volk/include/volk/volk_32fc_magnitude_32f_u.h diff --git a/volk/include/volk/volk_32fc_magnitude_32f_u.h b/volk/include/volk/volk_32fc_magnitude_32f_u.h new file mode 100644 index 000000000..ed1cedef9 --- /dev/null +++ b/volk/include/volk/volk_32fc_magnitude_32f_u.h @@ -0,0 +1,118 @@ +#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H +#define INCLUDED_volk_32fc_magnitude_32f_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + result = _mm_sqrt_ps(result); + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_SSE +#include + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, iValue, qValue, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i3i4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); + // Arrange in q1q2q3q4 format + qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + iValue = _mm_mul_ps(iValue, iValue); // Square the I values + qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values + + result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values + + result = _mm_sqrt_ps(result); + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + unsigned int number = 0; + for(number = 0; number < num_points; number++){ + const float real = *complexVectorPtr++; + const float imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag)); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */ -- cgit From d142fd9e31715bae85d65c4790f278143a784142 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 29 Jan 2012 17:32:30 -0500 Subject: volk: added volk magnitiude squared functions (aligned/unaligned) for complex numbers. --- volk/include/volk/Makefile.am | 3 + .../volk/volk_32fc_magnitude_squared_32f_a.h | 114 +++++++++++++++++++++ .../volk/volk_32fc_magnitude_squared_32f_u.h | 114 +++++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 volk/include/volk/volk_32fc_magnitude_squared_32f_a.h create mode 100644 volk/include/volk/volk_32fc_magnitude_squared_32f_u.h diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index e7333a015..c2502f6e6 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -71,6 +71,9 @@ volkinclude_HEADERS = \ volk_32fc_index_max_16u_a.h \ volk_32fc_s32f_magnitude_16i_a.h \ volk_32fc_magnitude_32f_a.h \ + volk_32fc_magnitude_32f_u.h \ + volk_32fc_magnitude_squared_32f_a.h \ + volk_32fc_magnitude_squared_32f_u.h \ volk_32fc_x2_multiply_32fc_a.h \ volk_32f_s32f_convert_16i_a.h \ volk_32f_s32f_convert_16i_u.h \ diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h new file mode 100644 index 000000000..00bdefbb5 --- /dev/null +++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h @@ -0,0 +1,114 @@ +#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H +#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + _mm_store_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_SSE +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, iValue, qValue, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i3i4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); + // Arrange in q1q2q3q4 format + qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + iValue = _mm_mul_ps(iValue, iValue); // Square the I values + qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values + + result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values + + _mm_store_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + unsigned int number = 0; + for(number = 0; number < num_points; number++){ + const float real = *complexVectorPtr++; + const float imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h new file mode 100644 index 000000000..6eb4a523a --- /dev/null +++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h @@ -0,0 +1,114 @@ +#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H +#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_SSE +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, iValue, qValue, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i3i4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); + // Arrange in q1q2q3q4 format + qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + iValue = _mm_mul_ps(iValue, iValue); // Square the I values + qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values + + result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_32fc_magnitude_squared_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + unsigned int number = 0; + for(number = 0; number < num_points; number++){ + const float real = *complexVectorPtr++; + const float imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */ -- cgit From 812144f4c153dca385b65e79401c9f1d88b90173 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 29 Jan 2012 17:34:06 -0500 Subject: core: switched complex to mag and mag_squared to use Volk functions. --- gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 25 +++++++++++++++------- gnuradio-core/src/lib/general/gr_complex_to_xxx.h | 5 +++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc index a59c127f3..0bdd06547 100644 --- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc +++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2004,2008,2010 Free Software Foundation, Inc. + * Copyright 2004,2008,2010,2012 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -27,6 +27,7 @@ #include #include #include +#include // ---------------------------------------------------------------- @@ -152,6 +153,9 @@ gr_complex_to_mag::gr_complex_to_mag (unsigned int vlen) gr_make_io_signature (1, 1, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -163,9 +167,9 @@ gr_complex_to_mag::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; - for (int i = 0; i < noi; i++){ - out[i] = std::abs (in[i]); - } + // turned out to be faster than aligned/unaligned switching + volk_32fc_magnitude_32f_u(out, in, noi); + return noutput_items; } @@ -183,6 +187,9 @@ gr_complex_to_mag_squared::gr_complex_to_mag_squared (unsigned int vlen) gr_make_io_signature (1, 1, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -194,11 +201,13 @@ gr_complex_to_mag_squared::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; - for (int i = 0; i < noi; i++){ - const float __x = in[i].real(); - const float __y = in[i].imag(); - out[i] = __x * __x + __y * __y; + if(unaligned()) { + volk_32fc_magnitude_squared_32f_u(out, in, noi); + } + else { + volk_32fc_magnitude_squared_32f_a(out, in, noi); } + return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h index 166403259..232071323 100644 --- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h +++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h @@ -109,10 +109,11 @@ class GR_CORE_API gr_complex_to_imag : public gr_sync_block */ class GR_CORE_API gr_complex_to_mag : public gr_sync_block { - friend GR_CORE_API gr_complex_to_mag_sptr gr_make_complex_to_mag (unsigned int vlen); + friend GR_CORE_API gr_complex_to_mag_sptr + gr_make_complex_to_mag (unsigned int vlen); gr_complex_to_mag (unsigned int vlen); - unsigned int d_vlen; + unsigned int d_vlen; public: virtual int work (int noutput_items, -- cgit From 83d0bf5d513e516d700e552fa2773dd852a6608a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 29 Jan 2012 17:35:09 -0500 Subject: core: minor edits. --- gnuradio-core/src/lib/general/gr_char_to_short.h | 1 + gnuradio-core/src/lib/runtime/gr_block_executor.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.h b/gnuradio-core/src/lib/general/gr_char_to_short.h index 8ed974acf..58f9a62b0 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_short.h +++ b/gnuradio-core/src/lib/general/gr_char_to_short.h @@ -39,6 +39,7 @@ gr_make_char_to_short (size_t vlen=1); class GR_CORE_API gr_char_to_short : public gr_sync_block { + private: friend GR_CORE_API gr_char_to_short_sptr gr_make_char_to_short (size_t vlen); gr_char_to_short (size_t vlen); diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index 3191246a7..02bb4873c 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -36,7 +36,7 @@ #include // must be defined to either 0 or 1 -#define ENABLE_LOGGING 1 +#define ENABLE_LOGGING 0 #if (ENABLE_LOGGING) #define LOG(x) do { x; } while(0) -- cgit From cb458204245632ac7a571bfe96b90d3c55a2f01a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 30 Jan 2012 00:22:44 -0500 Subject: volk: adding complex to imag kernel. --- volk/include/volk/Makefile.am | 1 + .../volk/volk_32fc_deinterleave_imag_32f_a.h | 68 ++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index c2502f6e6..c5b99c41b 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -65,6 +65,7 @@ volkinclude_HEADERS = \ volk_32fc_deinterleave_64f_x2_a.h \ volk_32fc_s32f_deinterleave_real_16i_a.h \ volk_32fc_deinterleave_real_32f_a.h \ + volk_32fc_deinterleave_imag_32f_a.h \ volk_32fc_deinterleave_real_64f_a.h \ volk_32fc_x2_dot_prod_32fc_a.h \ volk_32fc_x2_dot_prod_32fc_u.h \ diff --git a/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h new file mode 100644 index 000000000..adc4112b9 --- /dev/null +++ b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h @@ -0,0 +1,68 @@ +#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H +#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Deinterleaves the complex vector into Q vector data + \param complexVector The complex input vector + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (const float*)complexVector; + float* qBufferPtr = qBuffer; + + __m128 cplxValue1, cplxValue2, iValue; + for(;number < quarterPoints; number++){ + + cplxValue1 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in q1q2q3q4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + _mm_store_ps(qBufferPtr, iValue); + + qBufferPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + complexVectorPtr++; + *qBufferPtr++ = *complexVectorPtr++; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Deinterleaves the complex vector into Q vector data + \param complexVector The complex input vector + \param qBuffer The I buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +static inline void volk_32fc_deinterleave_imag_32f_a_generic(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const float* complexVectorPtr = (float*)complexVector; + float* qBufferPtr = qBuffer; + for(number = 0; number < num_points; number++){ + complexVectorPtr++; + *qBufferPtr++ = *complexVectorPtr++; + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */ -- cgit From 4769d5ff75520661ce52fae229482a96e651f7e2 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 30 Jan 2012 00:23:11 -0500 Subject: core: complex_to_xxx (float, real, imag, arg) to volk. --- gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 60 +++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc index 0bdd06547..9ed86f368 100644 --- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc +++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc @@ -57,17 +57,41 @@ gr_complex_to_float::work (int noutput_items, switch (output_items.size ()){ case 1: +#if 1 + if(is_unaligned()) { + for (int i = 0; i < noi; i++){ + out0[i] = in[i].real (); + } + } + else { + volk_32fc_deinterleave_real_32f_a(out0, in, noi); + } +#else for (int i = 0; i < noi; i++){ out0[i] = in[i].real (); } +#endif break; case 2: +#if 1 + out1 = (float *) output_items[1]; + if(is_unaligned()) { + for (int i = 0; i < noi; i++){ + out0[i] = in[i].real (); + out1[i] = in[i].imag (); + } + } + else { + volk_32fc_deinterleave_32f_x2_a(out0, out1, in, noi); + } +#else out1 = (float *) output_items[1]; for (int i = 0; i < noi; i++){ out0[i] = in[i].real (); out1[i] = in[i].imag (); } +#endif break; default: @@ -102,9 +126,21 @@ gr_complex_to_real::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; +#if 1 + if(is_unaligned()) { + for (int i = 0; i < noi; i++){ + out[i] = in[i].real (); + } + } + else { + volk_32fc_deinterleave_real_32f_a(out, in, noi); + } +#else for (int i = 0; i < noi; i++){ - out[i] = in[i].real (); + out0[i] = in[i].real (); } +#endif + return noutput_items; } @@ -133,9 +169,20 @@ gr_complex_to_imag::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; +#if 1 + if(is_unaligned()) { + for (int i = 0; i < noi; i++){ + out[i] = in[i].imag (); + } + } + else { + volk_32fc_deinterleave_imag_32f_a(out, in, noi); + } +#else for (int i = 0; i < noi; i++){ out[i] = in[i].imag (); } +#endif return noutput_items; } @@ -236,9 +283,20 @@ gr_complex_to_arg::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; +#if 1 + if(is_unaligned()) { + for (int i = 0; i < noi; i++){ + out[i] = gr_fast_atan2f(in[i]); + } + } + else { + volk_32fc_s32f_atan2_32f_a(out, in, 1, noi); + } +#else for (int i = 0; i < noi; i++){ // out[i] = std::arg (in[i]); out[i] = gr_fast_atan2f(in[i]); } +#endif return noutput_items; } -- cgit From 046385126d92cf9179ac84ede06d3d50e1c9030f Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 30 Jan 2012 12:12:44 -0500 Subject: core: fixing up complex_to_xxx for using Volk where appropriate. Speed benchmark were used to decide which implementation to use. --- gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 54 +++++++--------------- .../src/python/gnuradio/gr/qa_complex_to_xxx.py | 2 +- 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc index 9ed86f368..108a92835 100644 --- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc +++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc @@ -43,6 +43,9 @@ gr_complex_to_float::gr_complex_to_float (unsigned int vlen) gr_make_io_signature (1, 2, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -57,7 +60,6 @@ gr_complex_to_float::work (int noutput_items, switch (output_items.size ()){ case 1: -#if 1 if(is_unaligned()) { for (int i = 0; i < noi; i++){ out0[i] = in[i].real (); @@ -66,15 +68,9 @@ gr_complex_to_float::work (int noutput_items, else { volk_32fc_deinterleave_real_32f_a(out0, in, noi); } -#else - for (int i = 0; i < noi; i++){ - out0[i] = in[i].real (); - } -#endif break; case 2: -#if 1 out1 = (float *) output_items[1]; if(is_unaligned()) { for (int i = 0; i < noi; i++){ @@ -85,13 +81,6 @@ gr_complex_to_float::work (int noutput_items, else { volk_32fc_deinterleave_32f_x2_a(out0, out1, in, noi); } -#else - out1 = (float *) output_items[1]; - for (int i = 0; i < noi; i++){ - out0[i] = in[i].real (); - out1[i] = in[i].imag (); - } -#endif break; default: @@ -115,6 +104,9 @@ gr_complex_to_real::gr_complex_to_real (unsigned int vlen) gr_make_io_signature (1, 1, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -126,7 +118,6 @@ gr_complex_to_real::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; -#if 1 if(is_unaligned()) { for (int i = 0; i < noi; i++){ out[i] = in[i].real (); @@ -135,11 +126,6 @@ gr_complex_to_real::work (int noutput_items, else { volk_32fc_deinterleave_real_32f_a(out, in, noi); } -#else - for (int i = 0; i < noi; i++){ - out0[i] = in[i].real (); - } -#endif return noutput_items; } @@ -158,6 +144,9 @@ gr_complex_to_imag::gr_complex_to_imag (unsigned int vlen) gr_make_io_signature (1, 1, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -169,7 +158,6 @@ gr_complex_to_imag::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; -#if 1 if(is_unaligned()) { for (int i = 0; i < noi; i++){ out[i] = in[i].imag (); @@ -178,11 +166,7 @@ gr_complex_to_imag::work (int noutput_items, else { volk_32fc_deinterleave_imag_32f_a(out, in, noi); } -#else - for (int i = 0; i < noi; i++){ - out[i] = in[i].imag (); - } -#endif + return noutput_items; } @@ -248,7 +232,7 @@ gr_complex_to_mag_squared::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; - if(unaligned()) { + if(is_unaligned()) { volk_32fc_magnitude_squared_32f_u(out, in, noi); } else { @@ -272,6 +256,9 @@ gr_complex_to_arg::gr_complex_to_arg (unsigned int vlen) gr_make_io_signature (1, 1, sizeof (float) * vlen)), d_vlen(vlen) { + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); } int @@ -283,20 +270,11 @@ gr_complex_to_arg::work (int noutput_items, float *out = (float *) output_items[0]; int noi = noutput_items * d_vlen; -#if 1 - if(is_unaligned()) { - for (int i = 0; i < noi; i++){ - out[i] = gr_fast_atan2f(in[i]); - } - } - else { - volk_32fc_s32f_atan2_32f_a(out, in, 1, noi); - } -#else + // The fast_atan2f is faster than Volk for (int i = 0; i < noi; i++){ // out[i] = std::arg (in[i]); out[i] = gr_fast_atan2f(in[i]); } -#endif + return noutput_items; } diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py index 76627247b..01679dc05 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py @@ -134,7 +134,7 @@ class test_complex_ops (gr_unittest.TestCase): self.tb.run () actual_result = dst.data () - self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 5) + self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 3) if __name__ == '__main__': -- cgit From 6385380c812a8cd1470073d01c7e8b6006b8f398 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 30 Jan 2012 18:50:29 -0500 Subject: core: redo fft_filter (complex and float) with Volk. No need for sse implementation now but keeping code for reference. --- gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc | 5 ----- gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc | 7 +------ gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc | 5 ++--- gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc | 5 ++--- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc index 9fa98cc69..d523404c9 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc @@ -30,7 +30,6 @@ #endif #include -//#include #include #include #include @@ -57,11 +56,7 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation, const std::vectorset_taps(taps); set_output_multiple(d_nsamples); diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc index c0a9b3483..640851a1d 100644 --- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc +++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc @@ -26,7 +26,6 @@ #include #include -//#include #include #include #include @@ -50,11 +49,7 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation, const std::vector & { set_history(1); -#if 1 // don't enable the sse version until handling it is worked out - d_filter = new gri_fft_filter_fff_generic(decimation, taps); -#else - d_filter = new gri_fft_filter_fff_sse(decimation, taps); -#endif + d_filter = new gri_fft_filter_fff_generic(decimation, taps); d_new_taps = taps; d_nsamples = d_filter->set_taps(taps); set_output_multiple(d_nsamples); diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc index 891905dd0..d9700ad2e 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -137,9 +138,7 @@ gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_comp gr_complex *b = &d_xformed_taps[0]; gr_complex *c = d_invfft->get_inbuf(); - for (j = 0; j < d_fftsize; j+=1) { // filter in the freq domain - c[j] = a[j] * b[j]; - } + volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize); d_invfft->execute(); // compute inv xform diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc index b3fbe1d1a..64705ee5e 100644 --- a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc +++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -124,9 +125,7 @@ gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *outpu gr_complex *b = &d_xformed_taps[0]; gr_complex *c = d_invfft->get_inbuf(); - for (j = 0; j < d_fftsize/2+1; j++) { // filter in the freq domain - c[j] = a[j] * b[j]; - } + volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize/2+1); d_invfft->execute(); // compute inv xform -- cgit From 070a6c9ce93c2f2043a0145e253cc55f801f9433 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 14:25:53 -0500 Subject: volk: adding unaligned versions of complex multiply a constant and complex multiply 2 streams. --- .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 42 ++++++++++++ volk/include/volk/volk_32fc_x2_multiply_32fc_u.h | 77 ++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h create mode 100644 volk/include/volk/volk_32fc_x2_multiply_32fc_u.h diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h new file mode 100644 index 000000000..201dcf5f6 --- /dev/null +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h @@ -0,0 +1,42 @@ +#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H +#define INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = num_points; + + // unwrap loop + while (number >= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; + } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */ diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h new file mode 100644 index 000000000..729c1a4ad --- /dev/null +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h @@ -0,0 +1,77 @@ +#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H +#define INCLUDED_volk_32fc_x2_multiply_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, y, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + const lv_32fc_t* b = bVector; + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_storeu_ps((float*)c,z); // Store the results back into the C container + + a += 2; + b += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * (*b); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + const lv_32fc_t* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */ -- cgit From c6519775ac7d1e9098ca2fb20c09e15ae8e60a4a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:34:23 -0500 Subject: sched: better working alignment handling. Works with max_noutput_items and set_output_multiple. --- gnuradio-core/src/lib/runtime/gr_block_executor.cc | 53 ++++++++++------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index 02bb4873c..11c6639b3 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -83,11 +83,6 @@ min_available_space (gr_block_detail *d, int output_multiple) } return 0; } - else if (n > output_multiple) { - // adjust this or we often ask for too many, - // causing a re-calc for fewer items. - n = n-output_multiple; - } min_space = std::min (min_space, n); } return min_space; @@ -316,7 +311,7 @@ gr_block_executor::run_one_iteration() // only test this if we specifically set the output_multiple if(m->output_multiple_set()) - reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple()); + reqd_noutput_items = round_down(reqd_noutput_items, m->output_multiple()); if (reqd_noutput_items > 0 && reqd_noutput_items <= noutput_items) noutput_items = reqd_noutput_items; @@ -329,32 +324,34 @@ gr_block_executor::run_one_iteration() // Check if we're still unaligned; use up items until we're // aligned again. Otherwise, make sure we set the alignment // requirement. - if(m->is_unaligned()) { - // When unaligned, don't just set noutput_items to the remaining - // samples to meet alignment; this causes too much overhead in - // requiring a premature call back here. Set the maximum amount - // of samples to handle unalignment and get us back aligned. - if(noutput_items >= m->unaligned()) { - noutput_items = round_up(noutput_items, m->alignment()) \ - - (m->alignment() - m->unaligned()); - new_alignment = 0; + if(!m->output_multiple_set()) { + if(m->is_unaligned()) { + // When unaligned, don't just set noutput_items to the remaining + // samples to meet alignment; this causes too much overhead in + // requiring a premature call back here. Set the maximum amount + // of samples to handle unalignment and get us back aligned. + if(noutput_items >= m->unaligned()) { + noutput_items = round_up(noutput_items, m->alignment()) \ + - (m->alignment() - m->unaligned()); + new_alignment = 0; + } + else { + new_alignment = m->unaligned() - noutput_items; + } + } + else if(noutput_items < m->alignment()) { + // if we don't have enough for an aligned call, keep track of + // misalignment, set unaligned flag, and proceed. + new_alignment = m->alignment() - noutput_items; + m->set_unaligned(new_alignment); + m->set_is_unaligned(true); } else { - new_alignment = m->unaligned() - noutput_items; + // enough to round down to the nearest alignment and process. + noutput_items = round_down(noutput_items, m->alignment()); + m->set_is_unaligned(false); } } - else if(noutput_items < m->alignment()) { - // if we don't have enough for an aligned call, keep track of - // misalignment, set unaligned flag, and proceed. - new_alignment = m->alignment() - noutput_items; - m->set_unaligned(new_alignment); - m->set_is_unaligned(true); - } - else { - // enough to round down to the nearest alignment and process. - noutput_items = round_down(noutput_items, m->alignment()); - m->set_is_unaligned(false); - } // ask the block how much input they need to produce noutput_items m->forecast (noutput_items, d_ninput_items_required); -- cgit From 67d23bdecd3f15197bdf46f8c0cd66a6f754fea5 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:35:14 -0500 Subject: volk: improving performance of multiply_const and multiply two streams. --- .../include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 63 +++++++++++++++++++++- .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 45 ++++++++++++++++ volk/include/volk/volk_32fc_x2_multiply_32fc_a.h | 1 - 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h index b27a7259f..205461afb 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h @@ -6,6 +6,52 @@ #include #include +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + // Set up constant scalar vector + yl = _mm_set_ps1(lv_creal(scalar)); + yh = _mm_set_ps1(lv_cimag(scalar)); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_store_ps((float*)c,z); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + + #ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector @@ -17,11 +63,24 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; - unsigned int number = 0; + unsigned int number = num_points; - for(number = 0; number < num_points; number++){ + // unwrap loop + while (number >= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h index 201dcf5f6..a9dfcda19 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h @@ -6,6 +6,51 @@ #include #include +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + // Set up constant scalar vector + yl = _mm_set_ps1(lv_creal(scalar)); + yh = _mm_set_ps1(lv_cimag(scalar)); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_storeu_ps((float*)c,z); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + #ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h index 18dd092e8..aec8bd716 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h @@ -23,7 +23,6 @@ static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const l lv_32fc_t* c = cVector; const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; - for(;number < halfPoints; number++){ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi -- cgit From e8089db25b2e28824f11d27c9d98a4adef191736 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:36:47 -0500 Subject: core: moving multiply_cc and multiply_const_cc out of gengen and into general so they can make use of volk calls. QA code now explicitly tests the cc versions of these blocks. --- gnuradio-core/src/lib/general/.gitignore | 6 -- gnuradio-core/src/lib/general/CMakeLists.txt | 1 + gnuradio-core/src/lib/general/general.i | 2 + gnuradio-core/src/lib/general/gr_multiply_cc.cc | 69 ++++++++++++++++++ gnuradio-core/src/lib/general/gr_multiply_cc.h | 56 +++++++++++++++ gnuradio-core/src/lib/general/gr_multiply_cc.i | 32 +++++++++ .../src/lib/general/gr_multiply_const_cc.cc | 82 ++++++++++++++++++++++ .../src/lib/general/gr_multiply_const_cc.h | 60 ++++++++++++++++ .../src/lib/general/gr_multiply_const_cc.i | 33 +++++++++ gnuradio-core/src/lib/gengen/.gitignore | 6 -- gnuradio-core/src/lib/gengen/CMakeLists.txt | 4 +- gnuradio-core/src/lib/gengen/Makefile.gen | 6 -- gnuradio-core/src/lib/gengen/generate_common.py | 6 +- .../src/python/gnuradio/gr/qa_add_and_friends.py | 20 ++++++ 14 files changed, 360 insertions(+), 23 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.cc create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.h create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.i create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.cc create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.h create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.i diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore index 4f3696f58..349651e0c 100644 --- a/gnuradio-core/src/lib/general/.gitignore +++ b/gnuradio-core/src/lib/general/.gitignore @@ -158,12 +158,6 @@ /gr_divide_ss.cc /gr_divide_ss.h /gr_divide_ss.i -/gr_multiply_cc.cc -/gr_multiply_cc.h -/gr_multiply_cc.i -/gr_multiply_const_cc.cc -/gr_multiply_const_cc.h -/gr_multiply_const_cc.i /gr_multiply_const_ff.cc /gr_multiply_const_ff.h /gr_multiply_const_ff.i diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 6dc9d411d..6afdfe27c 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -231,6 +231,7 @@ set(gr_core_general_triple_threats gr_lfsr_32k_source_s gr_map_bb gr_multiply_cc + gr_multiply_const_cc gr_nlog10_ff gr_nop gr_null_sink diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index ec90e40e5..7de13258e 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ %include "gr_framer_sink_1.i" %include "gr_map_bb.i" %include "gr_multiply_cc.i" +%include "gr_multiply_const_cc.i" %include "gr_feval.i" %include "gr_pwr_squelch_cc.i" %include "gr_pwr_squelch_ff.i" diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_cc.cc new file mode 100644 index 000000000..0d20e6257 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_cc.cc @@ -0,0 +1,69 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_multiply_cc_sptr +gr_make_multiply_cc (size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_multiply_cc (vlen)); +} + +gr_multiply_cc::gr_multiply_cc (size_t vlen) + : gr_sync_block ("gr_multiply_cc", + gr_make_io_signature (1, -1, sizeof (gr_complex)*vlen), + gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)), + d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(alignment_multiple); +} + +int +gr_multiply_cc::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + gr_complex *out = (gr_complex *) output_items[0]; + int noi = d_vlen*noutput_items; + + memcpy(out, input_items[0], noi*sizeof(gr_complex)); + if(is_unaligned()) { + for(size_t i = 1; i < input_items.size(); i++) + volk_32fc_x2_multiply_32fc_u(out, out, (gr_complex*)input_items[i], noi); + } + else { + for(size_t i = 1; i < input_items.size(); i++) + volk_32fc_x2_multiply_32fc_a(out, out, (gr_complex*)input_items[i], noi); + } + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.h b/gnuradio-core/src/lib/general/gr_multiply_cc.h new file mode 100644 index 000000000..f80ec8b25 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_cc.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_MULTIPLY_CC_H +#define INCLUDED_GR_MULTIPLY_CC_H + +#include +#include + +class gr_multiply_cc; +typedef boost::shared_ptr gr_multiply_cc_sptr; + +GR_CORE_API gr_multiply_cc_sptr +gr_make_multiply_cc (size_t vlen=1); + +/*! + * \brief Multiply streams of complex values + * \ingroup math_blk + */ + +class GR_CORE_API gr_multiply_cc : public gr_sync_block +{ + private: + friend GR_CORE_API gr_multiply_cc_sptr + gr_make_multiply_cc (size_t vlen); + gr_multiply_cc (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_MULTIPLY_CC_H */ diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.i b/gnuradio-core/src/lib/general/gr_multiply_cc.i new file mode 100644 index 000000000..61768c390 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_cc.i @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,multiply_cc) + +gr_multiply_cc_sptr +gr_make_multiply_cc (size_t vlen=1); + +class gr_multiply_cc : public gr_sync_block +{ +public: + +}; diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc new file mode 100644 index 000000000..e301ae8eb --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_multiply_const_cc_sptr +gr_make_multiply_const_cc (gr_complex k, size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_multiply_const_cc (k, vlen)); +} + +gr_multiply_const_cc::gr_multiply_const_cc (gr_complex k, size_t vlen) + : gr_sync_block ("gr_multiply_const_cc", + gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen), + gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)), + d_k(k), d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(alignment_multiple); +} + +gr_complex +gr_multiply_const_cc::k() const +{ + return d_k; +} + +void +gr_multiply_const_cc::set_k(gr_complex k) +{ + d_k = k; +} + +#include + +int +gr_multiply_const_cc::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + const gr_complex *in = (const gr_complex *) input_items[0]; + gr_complex *out = (gr_complex *) output_items[0]; + int noi = d_vlen*noutput_items; + + if(is_unaligned()) { + volk_32fc_s32fc_multiply_32fc_u(out, in, d_k, noi); + } + else { + volk_32fc_s32fc_multiply_32fc_a(out, in, d_k, noi); + } + + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.h b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h new file mode 100644 index 000000000..1791d9160 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_MULTIPLY_CONST_CC_H +#define INCLUDED_GR_MULTIPLY_CONST_CC_H + +#include +#include + +class gr_multiply_const_cc; +typedef boost::shared_ptr gr_multiply_const_cc_sptr; + +GR_CORE_API gr_multiply_const_cc_sptr +gr_make_multiply_const_cc (gr_complex k, size_t vlen=1); + +/*! + * \brief Multiply stream of complex values with a constant \p k + * \ingroup math_blk + */ + +class GR_CORE_API gr_multiply_const_cc : public gr_sync_block +{ + private: + friend GR_CORE_API gr_multiply_const_cc_sptr + gr_make_multiply_const_cc (gr_complex k, size_t vlen); + gr_multiply_const_cc (gr_complex k, size_t vlen); + + gr_complex d_k; + size_t d_vlen; + + public: + gr_complex k() const; + void set_k(gr_complex k); + + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_MULTIPLY_CONST_CC_H */ diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.i b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i new file mode 100644 index 000000000..be8d32b31 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i @@ -0,0 +1,33 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,multiply_const_cc) + +gr_multiply_const_cc_sptr +gr_make_multiply_const_cc (gr_complex k, size_t vlen=1); + +class gr_multiply_const_cc : public gr_sync_block +{ +public: + gr_complex k() const; + void set_k(gr_complex k); +}; diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore index ecd4cb0d5..4422ae0dd 100644 --- a/gnuradio-core/src/lib/gengen/.gitignore +++ b/gnuradio-core/src/lib/gengen/.gitignore @@ -202,12 +202,6 @@ /gr_max_ss.cc /gr_max_ss.h /gr_max_ss.i -/gr_multiply_cc.cc -/gr_multiply_cc.h -/gr_multiply_cc.i -/gr_multiply_const_cc.cc -/gr_multiply_const_cc.h -/gr_multiply_const_cc.i /gr_multiply_const_ff.cc /gr_multiply_const_ff.h /gr_multiply_const_ff.i diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt index a7292f131..53dc9ce15 100644 --- a/gnuradio-core/src/lib/gengen/CMakeLists.txt +++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt @@ -86,10 +86,10 @@ expand_h_cc_i(gr_noise_source_X s i f c) expand_h_cc_i(gr_sig_source_X s i f c) expand_h_cc_i(gr_add_const_XX ss ii ff cc sf) -expand_h_cc_i(gr_multiply_const_XX ss ii ff cc) +expand_h_cc_i(gr_multiply_const_XX ss ii ff) expand_h_cc_i(gr_add_XX ss ii ff cc) expand_h_cc_i(gr_sub_XX ss ii ff cc) -expand_h_cc_i(gr_multiply_XX ss ii ff cc) +expand_h_cc_i(gr_multiply_XX ss ii ff) expand_h_cc_i(gr_divide_XX ss ii ff cc) expand_h_cc_i(gr_mute_XX ss ii ff cc) expand_h_cc_i(gr_add_const_vXX ss ii ff cc) diff --git a/gnuradio-core/src/lib/gengen/Makefile.gen b/gnuradio-core/src/lib/gengen/Makefile.gen index 1c529803c..fb7b21e24 100644 --- a/gnuradio-core/src/lib/gengen/Makefile.gen +++ b/gnuradio-core/src/lib/gengen/Makefile.gen @@ -45,8 +45,6 @@ GENERATED_H = \ gr_moving_average_ff.h \ gr_moving_average_ii.h \ gr_moving_average_ss.h \ - gr_multiply_cc.h \ - gr_multiply_const_cc.h \ gr_multiply_const_ff.h \ gr_multiply_const_ii.h \ gr_multiply_const_ss.h \ @@ -150,8 +148,6 @@ GENERATED_I = \ gr_moving_average_ff.i \ gr_moving_average_ii.i \ gr_moving_average_ss.i \ - gr_multiply_cc.i \ - gr_multiply_const_cc.i \ gr_multiply_const_ff.i \ gr_multiply_const_ii.i \ gr_multiply_const_ss.i \ @@ -255,8 +251,6 @@ GENERATED_CC = \ gr_moving_average_ff.cc \ gr_moving_average_ii.cc \ gr_moving_average_ss.cc \ - gr_multiply_cc.cc \ - gr_multiply_const_cc.cc \ gr_multiply_const_ff.cc \ gr_multiply_const_ii.cc \ gr_multiply_const_ss.cc \ diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py index 9bd6bcc9c..1c2c064c1 100755 --- a/gnuradio-core/src/lib/gengen/generate_common.py +++ b/gnuradio-core/src/lib/gengen/generate_common.py @@ -41,10 +41,8 @@ reg_signatures = ['ss', 'ii', 'ff', 'cc'] reg_roots = [ 'gr_add_const_XX', - 'gr_multiply_const_XX', 'gr_add_XX', 'gr_sub_XX', - 'gr_multiply_XX', 'gr_divide_XX', 'gr_mute_XX', 'gr_add_const_vXX', @@ -66,7 +64,9 @@ others = ( ('gr_sample_and_hold_XX', ('bb','ss','ii','ff')), ('gr_argmax_XX', ('fs','is','ss')), ('gr_max_XX', ('ff','ii','ss')), - ('gr_peak_detector_XX', ('fb','ib','sb')) + ('gr_peak_detector_XX', ('fb','ib','sb')), + ('gr_multiply_XX', ('ss','ii','ff')), + ('gr_multiply_const_XX', ('ss','ii','ff')) ) diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py index 8fb70fb3f..c1d8dafd1 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py @@ -78,6 +78,18 @@ class test_add_and_friends (gr_unittest.TestCase): op = gr.multiply_const_ii (5) self.help_ii ((src_data,), expected_result, op) + def test_mult_const_cc (self): + src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j) + expected_result = (-5-5j, 0+0j, 5+5j, 10+10j, 15+15j) + op = gr.multiply_const_cc (5) + self.help_cc ((src_data,), expected_result, op) + + def test_mult_const_cc2 (self): + src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j) + expected_result = (-3-7j, 0+0j, 3+7j, 6+14j, 9+21j) + op = gr.multiply_const_cc (5+2j) + self.help_cc ((src_data,), expected_result, op) + def test_add_ii (self): src1_data = (1, 2, 3, 4, 5) src2_data = (8, -3, 4, 8, 2) @@ -94,6 +106,14 @@ class test_add_and_friends (gr_unittest.TestCase): self.help_ii ((src1_data, src2_data), expected_result, op) + def test_mult_cc (self): + src1_data = (1+1j, 2+2j, 3+3j, 4+4j, 5+5j) + src2_data = (8, -3, 4, 8, 2) + expected_result = (8+8j, -6-6j, 12+12j, 32+32j, 10+10j) + op = gr.multiply_cc () + self.help_cc ((src1_data, src2_data), + expected_result, op) + def test_sub_ii_1 (self): src1_data = (1, 2, 3, 4, 5) expected_result = (-1, -2, -3, -4, -5) -- cgit From c1927c72bbae3cf6fe96e3d20f3e7bb74469539a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:37:53 -0500 Subject: volk: adding new Volk functions added in this branch to the volk_profile tool and installing it into $prefix/bin. --- volk/apps/CMakeLists.txt | 7 +++++++ volk/apps/volk_profile.cc | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt index f27bdc126..14291e5e3 100644 --- a/volk/apps/CMakeLists.txt +++ b/volk/apps/CMakeLists.txt @@ -42,4 +42,11 @@ add_executable(volk_profile target_link_libraries(volk_profile volk ${Boost_LIBRARIES}) +install( + PROGRAMS + ${CMAKE_BINARY_DIR}/apps/volk_profile + DESTINATION ${GR_RUNTIME_DIR} + COMPONENT "volk" +) + endif(Boost_FOUND AND UNIX) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 10a699872..c198ec42d 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -43,13 +43,18 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results); VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results); VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_index_max_16u_a, 3, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100, &results); VOLK_PROFILE(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_u, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_magnitude_squared_32f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_magnitude_squared_32f_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results); @@ -103,6 +108,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results); VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results); char path[256]; -- cgit From b3e538493f969526c704a3eb1b3c8d8c61be78f6 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:38:36 -0500 Subject: volk: adding new functions to Makefile. --- volk/include/volk/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index c5b99c41b..eea006bf2 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -56,6 +56,7 @@ volkinclude_HEADERS = \ volk_32f_s32f_multiply_32f_a.h \ volk_32fc_32f_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_a.h \ + volk_32fc_s32fc_multiply_32fc_u.h \ volk_32fc_s32f_power_32fc_a.h \ volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \ volk_32fc_s32f_atan2_32f_a.h \ @@ -76,6 +77,7 @@ volkinclude_HEADERS = \ volk_32fc_magnitude_squared_32f_a.h \ volk_32fc_magnitude_squared_32f_u.h \ volk_32fc_x2_multiply_32fc_a.h \ + volk_32fc_x2_multiply_32fc_u.h \ volk_32f_s32f_convert_16i_a.h \ volk_32f_s32f_convert_16i_u.h \ volk_32f_s32f_convert_32i_a.h \ -- cgit From a3b19015cb1c896aef19a7817458878337b3f5e3 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 16:38:56 -0500 Subject: core: more fixes when moving files from gengen to general. --- gnuradio-core/src/guile/tests/gengen_ctors.test | 6 ------ gnuradio-core/src/lib/general/general_generated.i | 4 ---- 2 files changed, 10 deletions(-) diff --git a/gnuradio-core/src/guile/tests/gengen_ctors.test b/gnuradio-core/src/guile/tests/gengen_ctors.test index 6e1213c63..6bac05394 100644 --- a/gnuradio-core/src/guile/tests/gengen_ctors.test +++ b/gnuradio-core/src/guile/tests/gengen_ctors.test @@ -161,12 +161,6 @@ ;;; ./gengen/gr_moving_average_ss.h (pass-if (true? (gr:moving-average-ss 1 0 4096))) -;;; ./gengen/gr_multiply_cc.h -(pass-if (true? (gr:multiply-cc 1))) - -;;; ./gengen/gr_multiply_const_cc.h -(pass-if (true? (gr:multiply-const-cc 1))) - ;;; ./gengen/gr_multiply_const_ff.h (pass-if (true? (gr:multiply-const-ff 1))) diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i index a41f30a3d..847860f53 100644 --- a/gnuradio-core/src/lib/general/general_generated.i +++ b/gnuradio-core/src/lib/general/general_generated.i @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include #include @@ -106,8 +104,6 @@ %include %include %include -%include -%include %include %include %include -- cgit From ae663decab658be25ac01072fa2f5c8454bd6167 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 2 Feb 2012 17:26:39 -0500 Subject: core: moving multiply_const_ff from gengen to general to take advantage of volk. Also adds SSE and AVX and unaligned Volk versions for this. --- gnuradio-core/src/lib/general/CMakeLists.txt | 1 + gnuradio-core/src/lib/general/Makefile.am | 9 +++ gnuradio-core/src/lib/general/general.i | 2 + gnuradio-core/src/lib/general/general_generated.i | 1 - gnuradio-core/src/lib/general/gr_int_to_float.cc | 7 +- .../src/lib/general/gr_multiply_const_cc.cc | 2 - .../src/lib/general/gr_multiply_const_ff.cc | 80 ++++++++++++++++++++++ .../src/lib/general/gr_multiply_const_ff.h | 60 ++++++++++++++++ .../src/lib/general/gr_multiply_const_ff.i | 33 +++++++++ gnuradio-core/src/lib/gengen/CMakeLists.txt | 2 +- gnuradio-core/src/lib/gengen/generate_common.py | 2 +- .../src/python/gnuradio/gr/qa_add_and_friends.py | 6 ++ volk/apps/volk_profile.cc | 1 + volk/include/volk/volk_32f_s32f_multiply_32f_a.h | 75 ++++++++++++++++++++ .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 12 ++-- 15 files changed, 276 insertions(+), 17 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.cc create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.h create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.i diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 6afdfe27c..393f732b7 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -232,6 +232,7 @@ set(gr_core_general_triple_threats gr_map_bb gr_multiply_cc gr_multiply_const_cc + gr_multiply_const_ff gr_nlog10_ff gr_nop gr_null_sink diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index 65b5a729e..5a6f5bf46 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -94,6 +94,9 @@ libgeneral_la_SOURCES = \ gr_lfsr_32k_source_s.cc \ gr_map_bb.cc \ gr_misc.cc \ + gr_multiply_cc.cc \ + gr_multiply_const_cc.cc \ + gr_multiply_const_ff.cc \ gr_nlog10_ff.cc \ gr_nop.cc \ gr_null_sink.cc \ @@ -249,6 +252,9 @@ grinclude_HEADERS = \ gr_map_bb.h \ gr_math.h \ gr_misc.h \ + gr_multiply_cc.h \ + gr_multiply_const_cc.h \ + gr_multiply_const_ff.h \ gr_nco.h \ gr_nlog10_ff.h \ gr_nop.h \ @@ -408,6 +414,9 @@ swiginclude_HEADERS = \ gr_kludge_copy.i \ gr_lfsr_32k_source_s.i \ gr_map_bb.i \ + gr_multiply_cc.i \ + gr_multiply_const_cc.i \ + gr_multiply_const_ff.i \ gr_nlog10_ff.i \ gr_nop.i \ gr_null_sink.i \ diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index 7de13258e..ac3fef84c 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -108,6 +108,7 @@ #include #include #include +#include #include #include #include @@ -226,6 +227,7 @@ %include "gr_map_bb.i" %include "gr_multiply_cc.i" %include "gr_multiply_const_cc.i" +%include "gr_multiply_const_ff.i" %include "gr_feval.i" %include "gr_pwr_squelch_cc.i" %include "gr_pwr_squelch_ff.i" diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i index 847860f53..52e09f89b 100644 --- a/gnuradio-core/src/lib/general/general_generated.i +++ b/gnuradio-core/src/lib/general/general_generated.i @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.cc b/gnuradio-core/src/lib/general/gr_int_to_float.cc index dca0e1b89..7ec15b1a8 100644 --- a/gnuradio-core/src/lib/general/gr_int_to_float.cc +++ b/gnuradio-core/src/lib/general/gr_int_to_float.cc @@ -26,7 +26,6 @@ #include #include -#include #include gr_int_to_float_sptr @@ -54,17 +53,13 @@ gr_int_to_float::work (int noutput_items, const int32_t *in = (const int32_t *) input_items[0]; float *out = (float *) output_items[0]; -#if 1 if(is_unaligned()) { volk_32i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items); } else { volk_32i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items); } -#else - gri_int_to_float(in, out, d_vlen*noutput_items); -#endif - + return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc index e301ae8eb..59521f54a 100644 --- a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc +++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc @@ -57,8 +57,6 @@ gr_multiply_const_cc::set_k(gr_complex k) d_k = k; } -#include - int gr_multiply_const_cc::work (int noutput_items, gr_vector_const_void_star &input_items, diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc new file mode 100644 index 000000000..8354cb27b --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc @@ -0,0 +1,80 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_multiply_const_ff_sptr +gr_make_multiply_const_ff (float k, size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_multiply_const_ff (k, vlen)); +} + +gr_multiply_const_ff::gr_multiply_const_ff (float k, size_t vlen) + : gr_sync_block ("gr_multiply_const_ff", + gr_make_io_signature (1, 1, sizeof (float)*vlen), + gr_make_io_signature (1, 1, sizeof (float)*vlen)), + d_k(k), d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +float +gr_multiply_const_ff::k() const +{ + return d_k; +} + +void +gr_multiply_const_ff::set_k(float k) +{ + d_k = k; +} + +int +gr_multiply_const_ff::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + const float *in = (const float *) input_items[0]; + float *out = (float *) output_items[0]; + int noi = d_vlen*noutput_items; + + if(is_unaligned()) { + volk_32f_s32f_multiply_32f_u(out, in, d_k, noi); + } + else { + volk_32f_s32f_multiply_32f_a(out, in, d_k, noi); + } + + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.h b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h new file mode 100644 index 000000000..ef42a92f4 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_MULTIPLY_CONST_FF_H +#define INCLUDED_GR_MULTIPLY_CONST_FF_H + +#include +#include + +class gr_multiply_const_ff; +typedef boost::shared_ptr gr_multiply_const_ff_sptr; + +GR_CORE_API gr_multiply_const_ff_sptr +gr_make_multiply_const_ff (float k, size_t vlen=1); + +/*! + * \brief Multiply stream of float values with a constant \p k + * \ingroup math_blk + */ + +class GR_CORE_API gr_multiply_const_ff : public gr_sync_block +{ + private: + friend GR_CORE_API gr_multiply_const_ff_sptr + gr_make_multiply_const_ff (float k, size_t vlen); + gr_multiply_const_ff (float k, size_t vlen); + + float d_k; + size_t d_vlen; + + public: + float k() const; + void set_k(float k); + + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_MULTIPLY_CONST_FF_H */ diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.i b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i new file mode 100644 index 000000000..0fd3b1225 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i @@ -0,0 +1,33 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,multiply_const_ff) + +gr_multiply_const_ff_sptr +gr_make_multiply_const_ff (float k, size_t vlen=1); + +class gr_multiply_const_ff : public gr_sync_block +{ +public: + float k() const; + void set_k(float k); +}; diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt index 53dc9ce15..83213dc04 100644 --- a/gnuradio-core/src/lib/gengen/CMakeLists.txt +++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt @@ -86,7 +86,7 @@ expand_h_cc_i(gr_noise_source_X s i f c) expand_h_cc_i(gr_sig_source_X s i f c) expand_h_cc_i(gr_add_const_XX ss ii ff cc sf) -expand_h_cc_i(gr_multiply_const_XX ss ii ff) +expand_h_cc_i(gr_multiply_const_XX ss ii) expand_h_cc_i(gr_add_XX ss ii ff cc) expand_h_cc_i(gr_sub_XX ss ii ff cc) expand_h_cc_i(gr_multiply_XX ss ii ff) diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py index 1c2c064c1..5caa7098b 100755 --- a/gnuradio-core/src/lib/gengen/generate_common.py +++ b/gnuradio-core/src/lib/gengen/generate_common.py @@ -66,7 +66,7 @@ others = ( ('gr_max_XX', ('ff','ii','ss')), ('gr_peak_detector_XX', ('fb','ib','sb')), ('gr_multiply_XX', ('ss','ii','ff')), - ('gr_multiply_const_XX', ('ss','ii','ff')) + ('gr_multiply_const_XX', ('ss','ii')) ) diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py index c1d8dafd1..aad57e580 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py @@ -78,6 +78,12 @@ class test_add_and_friends (gr_unittest.TestCase): op = gr.multiply_const_ii (5) self.help_ii ((src_data,), expected_result, op) + def test_mult_const_ff (self): + src_data = (-1, 0, 1, 2, 3) + expected_result = (-5, 0, 5, 10, 15) + op = gr.multiply_const_cc (5) + self.help_cc ((src_data,), expected_result, op) + def test_mult_const_cc (self): src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j) expected_result = (-5-5j, 0+0j, 5+5j, 10+10j, 15+15j) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index c198ec42d..7da8651e9 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -110,6 +110,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 204600, 1000, &results); char path[256]; get_config_path(path); diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h index 37223dc81..d1c6f3f65 100644 --- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h +++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h @@ -4,6 +4,81 @@ #include #include +#ifdef LV_HAVE_SSE +#include +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m128 aVal, bVal, cVal; + bVal = _mm_set_ps1(scalar); + for(;number < quarterPoints; number++){ + + aVal = _mm_load_ps(aPtr); + + cVal = _mm_mul_ps(aVal, bVal); + + _mm_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_AVX +#include +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m256 aVal, bVal, cVal; + bVal = _mm256_set1_ps(scalar); + for(;number < eighthPoints; number++){ + + aVal = _mm256_load_ps(aPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_AVX */ + + #ifdef LV_HAVE_GENERIC /*! \brief Scalar float multiply diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h index a9dfcda19..450a89066 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h @@ -9,10 +9,10 @@ #ifdef LV_HAVE_SSE3 #include /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector + \brief Multiplies the input vector by a scalar and stores the results in the third vector \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ @@ -53,10 +53,10 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons #ifdef LV_HAVE_GENERIC /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector + \brief Multiplies the input vector by a scalar and stores the results in the third vector \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ -- cgit From 3a21ccb8cdef50daa52f5d26293df3a03c821c99 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 4 Feb 2012 11:03:40 -0500 Subject: sched: some added protections and checks for the alignment states. --- gnuradio-core/src/lib/runtime/gr_block_executor.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc index 11c6639b3..86289695a 100644 --- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc +++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc @@ -183,7 +183,8 @@ gr_block_executor::run_one_iteration() int noutput_items; int max_items_avail; int max_noutput_items = d_max_noutput_items; - int new_alignment; + int new_alignment=0; + int alignment_state=-1; gr_block *m = d_block.get(); gr_block_detail *d = m->detail().get(); @@ -338,6 +339,7 @@ gr_block_executor::run_one_iteration() else { new_alignment = m->unaligned() - noutput_items; } + alignment_state = 0; } else if(noutput_items < m->alignment()) { // if we don't have enough for an aligned call, keep track of @@ -345,11 +347,13 @@ gr_block_executor::run_one_iteration() new_alignment = m->alignment() - noutput_items; m->set_unaligned(new_alignment); m->set_is_unaligned(true); + alignment_state = 1; } else { // enough to round down to the nearest alignment and process. noutput_items = round_down(noutput_items, m->alignment()); m->set_is_unaligned(false); + alignment_state = 2; } } @@ -391,6 +395,12 @@ gr_block_executor::run_one_iteration() goto were_done; } + // If we were made unaligned in this round but return here without + // processing; reset the unalignment claim before next entry. + if(alignment_state == 1) { + m->set_unaligned(0); + m->set_is_unaligned(false); + } return BLKD_IN; } -- cgit From f028a198cb47e0486ad41a29bd3b3dcf0663d766 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 4 Feb 2012 11:04:30 -0500 Subject: volk: new unaligned versions of float multipliers. --- volk/apps/volk_profile.cc | 1 + volk/include/volk/Makefile.am | 2 + volk/include/volk/volk_32f_s32f_multiply_32f_u.h | 102 ++++++++++++++++++++ volk/include/volk/volk_32f_x2_multiply_32f_u.h | 106 +++++++++++++++++++++ .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 24 ++--- volk/lib/testqa.cc | 10 +- 6 files changed, 231 insertions(+), 14 deletions(-) create mode 100644 volk/include/volk/volk_32f_s32f_multiply_32f_u.h create mode 100644 volk/include/volk/volk_32f_x2_multiply_32f_u.h diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 7da8651e9..f5f730df1 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -77,6 +77,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000, &results); VOLK_PROFILE(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000, &results); VOLK_PROFILE(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_u, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100, &results); VOLK_PROFILE(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100, &results); diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index eea006bf2..312ff2d5a 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -54,6 +54,7 @@ volkinclude_HEADERS = \ volk_32f_accumulator_s32f_a.h \ volk_32f_x2_add_32f_a.h \ volk_32f_s32f_multiply_32f_a.h \ + volk_32f_s32f_multiply_32f_u.h \ volk_32fc_32f_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_u.h \ @@ -100,6 +101,7 @@ volkinclude_HEADERS = \ volk_32f_x2_max_32f_a.h \ volk_32f_x2_min_32f_a.h \ volk_32f_x2_multiply_32f_a.h \ + volk_32f_x2_multiply_32f_u.h \ volk_32f_s32f_normalize_a.h \ volk_32f_s32f_power_32f_a.h \ volk_32f_sqrt_32f_a.h \ diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_u.h b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h new file mode 100644 index 000000000..0e700060f --- /dev/null +++ b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h @@ -0,0 +1,102 @@ +#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H +#define INCLUDED_volk_32f_s32f_multiply_32f_u_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m128 aVal, bVal, cVal; + bVal = _mm_set_ps1(scalar); + for(;number < quarterPoints; number++){ + + aVal = _mm_loadu_ps(aPtr); + + cVal = _mm_mul_ps(aVal, bVal); + + _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_AVX +#include +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + + __m256 aVal, bVal, cVal; + bVal = _mm256_set1_ps(scalar); + for(;number < eighthPoints; number++){ + + aVal = _mm256_loadu_ps(aPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * scalar; + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Scalar float multiply + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param scalar the scalar value + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const float* inputPtr = aVector; + float* outputPtr = cVector; + for(number = 0; number < num_points; number++){ + *outputPtr = (*inputPtr) * scalar; + inputPtr++; + outputPtr++; + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */ diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_u.h b/volk/include/volk/volk_32f_x2_multiply_32f_u.h new file mode 100644 index 000000000..6c3ce5d83 --- /dev/null +++ b/volk/include/volk/volk_32f_x2_multiply_32f_u.h @@ -0,0 +1,106 @@ +#ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H +#define INCLUDED_volk_32f_x2_multiply_32f_u_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Multiplys the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m128 aVal, bVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_loadu_ps(aPtr); + bVal = _mm_loadu_ps(bPtr); + + cVal = _mm_mul_ps(aVal, bVal); + + _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + bPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_AVX +#include +/*! + \brief Multiplies the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m256 aVal, bVal, cVal; + for(;number < eighthPoints; number++){ + + aVal = _mm256_loadu_ps(aPtr); + bVal = _mm256_loadu_ps(bPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + bPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplys the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */ diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h index 450a89066..218c450f8 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h @@ -8,13 +8,13 @@ #ifdef LV_HAVE_SSE3 #include - /*! +/*! \brief Multiplies the input vector by a scalar and stores the results in the third vector - \param cVector The vector where the results will be stored - \param aVector The vector to be multiplied - \param scalar The complex scalar to multiply aVector - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector - */ + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector +*/ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -52,13 +52,13 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons #endif /* LV_HAVE_SSE */ #ifdef LV_HAVE_GENERIC - /*! +/*! \brief Multiplies the input vector by a scalar and stores the results in the third vector - \param cVector The vector where the results will be stored - \param aVector The vector to be multiplied - \param scalar The complex scalar to multiply aVector - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector - */ + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector +*/ static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index fbd4bdea5..97624775e 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -37,7 +37,6 @@ VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 1); VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 3, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 1); @@ -59,7 +58,6 @@ VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 1); VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 1); @@ -90,3 +88,11 @@ VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 1); VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 1); VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1); VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 20460, 1); -- cgit From 47c390286d49e00498a3443a3dcb9f83d11c7ecc Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 4 Feb 2012 11:05:25 -0500 Subject: core: new multiply_const_ff and multiply_ff blocks done using Volk. --- gnuradio-core/src/lib/general/.gitignore | 6 -- gnuradio-core/src/lib/general/CMakeLists.txt | 1 + gnuradio-core/src/lib/general/Makefile.am | 3 + gnuradio-core/src/lib/general/general.i | 2 + gnuradio-core/src/lib/general/general_generated.i | 3 - gnuradio-core/src/lib/general/gr_multiply_ff.cc | 69 ++++++++++++++++++++++ gnuradio-core/src/lib/general/gr_multiply_ff.h | 56 ++++++++++++++++++ gnuradio-core/src/lib/general/gr_multiply_ff.i | 32 ++++++++++ gnuradio-core/src/lib/gengen/.gitignore | 6 -- gnuradio-core/src/lib/gengen/CMakeLists.txt | 2 +- gnuradio-core/src/lib/gengen/generate_common.py | 2 +- .../src/python/gnuradio/gr/qa_add_and_friends.py | 8 +++ 12 files changed, 173 insertions(+), 17 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.cc create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.h create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.i diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore index 349651e0c..b04ffe4ae 100644 --- a/gnuradio-core/src/lib/general/.gitignore +++ b/gnuradio-core/src/lib/general/.gitignore @@ -158,18 +158,12 @@ /gr_divide_ss.cc /gr_divide_ss.h /gr_divide_ss.i -/gr_multiply_const_ff.cc -/gr_multiply_const_ff.h -/gr_multiply_const_ff.i /gr_multiply_const_ii.cc /gr_multiply_const_ii.h /gr_multiply_const_ii.i /gr_multiply_const_ss.cc /gr_multiply_const_ss.h /gr_multiply_const_ss.i -/gr_multiply_ff.cc -/gr_multiply_ff.h -/gr_multiply_ff.i /gr_multiply_ii.cc /gr_multiply_ii.h /gr_multiply_ii.i diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 393f732b7..301465361 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -231,6 +231,7 @@ set(gr_core_general_triple_threats gr_lfsr_32k_source_s gr_map_bb gr_multiply_cc + gr_multiply_ff gr_multiply_const_cc gr_multiply_const_ff gr_nlog10_ff diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index 5a6f5bf46..b452a5107 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -95,6 +95,7 @@ libgeneral_la_SOURCES = \ gr_map_bb.cc \ gr_misc.cc \ gr_multiply_cc.cc \ + gr_multiply_ff.cc \ gr_multiply_const_cc.cc \ gr_multiply_const_ff.cc \ gr_nlog10_ff.cc \ @@ -253,6 +254,7 @@ grinclude_HEADERS = \ gr_math.h \ gr_misc.h \ gr_multiply_cc.h \ + gr_multiply_ff.h \ gr_multiply_const_cc.h \ gr_multiply_const_ff.h \ gr_nco.h \ @@ -415,6 +417,7 @@ swiginclude_HEADERS = \ gr_lfsr_32k_source_s.i \ gr_map_bb.i \ gr_multiply_cc.i \ + gr_multiply_ff.i \ gr_multiply_const_cc.i \ gr_multiply_const_ff.i \ gr_nlog10_ff.i \ diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index ac3fef84c..8e1be02f9 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -226,6 +227,7 @@ %include "gr_framer_sink_1.i" %include "gr_map_bb.i" %include "gr_multiply_cc.i" +%include "gr_multiply_ff.i" %include "gr_multiply_const_cc.i" %include "gr_multiply_const_ff.i" %include "gr_feval.i" diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i index 52e09f89b..82f9a6006 100644 --- a/gnuradio-core/src/lib/general/general_generated.i +++ b/gnuradio-core/src/lib/general/general_generated.i @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -103,14 +102,12 @@ %include %include %include -%include %include %include %include %include %include %include -%include %include %include %include diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_ff.cc new file mode 100644 index 000000000..a7d34ce51 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_ff.cc @@ -0,0 +1,69 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_multiply_ff_sptr +gr_make_multiply_ff (size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_multiply_ff (vlen)); +} + +gr_multiply_ff::gr_multiply_ff (size_t vlen) + : gr_sync_block ("gr_multiply_ff", + gr_make_io_signature (1, -1, sizeof (float)*vlen), + gr_make_io_signature (1, 1, sizeof (float)*vlen)), + d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +int +gr_multiply_ff::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + float *out = (float *) output_items[0]; + int noi = d_vlen*noutput_items; + + memcpy(out, input_items[0], noi*sizeof(float)); + if(is_unaligned()) { + for(size_t i = 1; i < input_items.size(); i++) + volk_32f_x2_multiply_32f_u(out, out, (const float*)input_items[i], noi); + } + else { + for(size_t i = 1; i < input_items.size(); i++) + volk_32f_x2_multiply_32f_a(out, out, (const float*)input_items[i], noi); + } + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.h b/gnuradio-core/src/lib/general/gr_multiply_ff.h new file mode 100644 index 000000000..ae36cb1e0 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_ff.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_MULTIPLY_FF_H +#define INCLUDED_GR_MULTIPLY_FF_H + +#include +#include + +class gr_multiply_ff; +typedef boost::shared_ptr gr_multiply_ff_sptr; + +GR_CORE_API gr_multiply_ff_sptr +gr_make_multiply_ff (size_t vlen=1); + +/*! + * \brief Multiply streams of complex values + * \ingroup math_blk + */ + +class GR_CORE_API gr_multiply_ff : public gr_sync_block +{ + private: + friend GR_CORE_API gr_multiply_ff_sptr + gr_make_multiply_ff (size_t vlen); + gr_multiply_ff (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_MULTIPLY_FF_H */ diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.i b/gnuradio-core/src/lib/general/gr_multiply_ff.i new file mode 100644 index 000000000..0f06301f2 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_ff.i @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,multiply_ff) + +gr_multiply_ff_sptr +gr_make_multiply_ff (size_t vlen=1); + +class gr_multiply_ff : public gr_sync_block +{ +public: + +}; diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore index 4422ae0dd..72c915cb8 100644 --- a/gnuradio-core/src/lib/gengen/.gitignore +++ b/gnuradio-core/src/lib/gengen/.gitignore @@ -202,9 +202,6 @@ /gr_max_ss.cc /gr_max_ss.h /gr_max_ss.i -/gr_multiply_const_ff.cc -/gr_multiply_const_ff.h -/gr_multiply_const_ff.i /gr_multiply_const_ii.cc /gr_multiply_const_ii.h /gr_multiply_const_ii.i @@ -223,9 +220,6 @@ /gr_multiply_const_vss.cc /gr_multiply_const_vss.h /gr_multiply_const_vss.i -/gr_multiply_ff.cc -/gr_multiply_ff.h -/gr_multiply_ff.i /gr_multiply_ii.cc /gr_multiply_ii.h /gr_multiply_ii.i diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt index 83213dc04..c3c4a7a35 100644 --- a/gnuradio-core/src/lib/gengen/CMakeLists.txt +++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt @@ -89,7 +89,7 @@ expand_h_cc_i(gr_add_const_XX ss ii ff cc sf) expand_h_cc_i(gr_multiply_const_XX ss ii) expand_h_cc_i(gr_add_XX ss ii ff cc) expand_h_cc_i(gr_sub_XX ss ii ff cc) -expand_h_cc_i(gr_multiply_XX ss ii ff) +expand_h_cc_i(gr_multiply_XX ss ii) expand_h_cc_i(gr_divide_XX ss ii ff cc) expand_h_cc_i(gr_mute_XX ss ii ff cc) expand_h_cc_i(gr_add_const_vXX ss ii ff cc) diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py index 5caa7098b..0c3d4579d 100755 --- a/gnuradio-core/src/lib/gengen/generate_common.py +++ b/gnuradio-core/src/lib/gengen/generate_common.py @@ -65,7 +65,7 @@ others = ( ('gr_argmax_XX', ('fs','is','ss')), ('gr_max_XX', ('ff','ii','ss')), ('gr_peak_detector_XX', ('fb','ib','sb')), - ('gr_multiply_XX', ('ss','ii','ff')), + ('gr_multiply_XX', ('ss','ii')), ('gr_multiply_const_XX', ('ss','ii')) ) diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py index aad57e580..e3b20c3c3 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py @@ -112,6 +112,14 @@ class test_add_and_friends (gr_unittest.TestCase): self.help_ii ((src1_data, src2_data), expected_result, op) + def test_mult_ff (self): + src1_data = (1, 2, 3, 4, 5) + src2_data = (8, -3, 4, 8, 2) + expected_result = (8, -6, 12, 32, 10) + op = gr.multiply_ff () + self.help_ff ((src1_data, src2_data), + expected_result, op) + def test_mult_cc (self): src1_data = (1+1j, 2+2j, 3+3j, 4+4j, 5+5j) src2_data = (8, -3, 4, 8, 2) -- cgit From 298623615b249de459cd12b5507dab921fc3210b Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 6 Feb 2012 10:31:28 -0500 Subject: volk: added unaligned version of adding 2 vectors. --- volk/apps/volk_profile.cc | 1 + volk/include/volk/Makefile.am | 1 + volk/include/volk/volk_32f_x2_add_32f_u.h | 66 +++++++++++++++++++++++++++++++ volk/lib/testqa.cc | 1 + 4 files changed, 69 insertions(+) create mode 100644 volk/include/volk/volk_32f_x2_add_32f_u.h diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index f5f730df1..712c32bce 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -34,6 +34,7 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_16u_byteswap_a, 0, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_u, 1e-4, 0, 204600, 10000, &results); VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50, &results); VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000, &results); diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 312ff2d5a..20864efbe 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -53,6 +53,7 @@ volkinclude_HEADERS = \ volk_16u_byteswap_a.h \ volk_32f_accumulator_s32f_a.h \ volk_32f_x2_add_32f_a.h \ + volk_32f_x2_add_32f_u.h \ volk_32f_s32f_multiply_32f_a.h \ volk_32f_s32f_multiply_32f_u.h \ volk_32fc_32f_multiply_32fc_a.h \ diff --git a/volk/include/volk/volk_32f_x2_add_32f_u.h b/volk/include/volk/volk_32f_x2_add_32f_u.h new file mode 100644 index 000000000..e360a7958 --- /dev/null +++ b/volk/include/volk/volk_32f_x2_add_32f_u.h @@ -0,0 +1,66 @@ +#ifndef INCLUDED_volk_32f_x2_add_32f_u_H +#define INCLUDED_volk_32f_x2_add_32f_u_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m128 aVal, bVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_loadu_ps(aPtr); + bVal = _mm_loadu_ps(bPtr); + + cVal = _mm_add_ps(aVal, bVal); + + _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + bPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_32f_x2_add_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_32f_x2_add_32f_u_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 97624775e..b00ea0b64 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -22,6 +22,7 @@ VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 1); VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_add_32f_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1); -- cgit From fb2a84add9706a046b4761021707d6bb97496a2e Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 6 Feb 2012 10:32:18 -0500 Subject: core: added volk version of add_ff. --- gnuradio-core/src/lib/general/.gitignore | 3 - gnuradio-core/src/lib/general/CMakeLists.txt | 1 + gnuradio-core/src/lib/general/Makefile.am | 3 + gnuradio-core/src/lib/general/general.i | 2 + gnuradio-core/src/lib/general/general_generated.i | 2 - gnuradio-core/src/lib/general/gr_add_ff.cc | 70 +++++++++++++++++++++++ gnuradio-core/src/lib/general/gr_add_ff.h | 56 ++++++++++++++++++ gnuradio-core/src/lib/general/gr_add_ff.i | 32 +++++++++++ gnuradio-core/src/lib/gengen/.gitignore | 3 - gnuradio-core/src/lib/gengen/CMakeLists.txt | 2 +- gnuradio-core/src/lib/gengen/generate_common.py | 4 +- 11 files changed, 167 insertions(+), 11 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.cc create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.h create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.i diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore index b04ffe4ae..795dc793c 100644 --- a/gnuradio-core/src/lib/general/.gitignore +++ b/gnuradio-core/src/lib/general/.gitignore @@ -125,9 +125,6 @@ /gr_add_const_vss.cc /gr_add_const_vss.h /gr_add_const_vss.i -/gr_add_ff.cc -/gr_add_ff.h -/gr_add_ff.i /gr_add_ii.cc /gr_add_ii.h /gr_add_ii.i diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index 301465361..ab35aa66a 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -178,6 +178,7 @@ endif(ENABLE_PYTHON) ######################################################################## set(gr_core_general_triple_threats complex_vec_test + gr_add_ff gr_additive_scrambler_bb gr_agc_cc gr_agc_ff diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index b452a5107..89152d663 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -37,6 +37,7 @@ EXTRA_DIST += \ libgeneral_la_SOURCES = \ complex_vec_test.cc \ + gr_add_ff.cc \ gr_additive_scrambler_bb.cc \ gr_agc_cc.cc \ gr_agc_ff.cc \ @@ -192,6 +193,7 @@ libgeneral_qa_la_SOURCES = \ grinclude_HEADERS = \ gr_core_api.h \ complex_vec_test.h \ + gr_add.h \ gr_additive_scrambler_bb.h \ gr_agc_cc.h \ gr_agc_ff.h \ @@ -364,6 +366,7 @@ noinst_HEADERS = \ swiginclude_HEADERS = \ complex_vec_test.i \ general.i \ + gr_add.i \ gr_additive_scrambler_bb.i \ gr_agc_cc.i \ gr_agc_ff.i \ diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index 8e1be02f9..c384ecfbb 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -140,6 +140,7 @@ #include #include #include +#include %} %include "gri_control_loop.i" @@ -260,3 +261,4 @@ %include "gr_burst_tagger.i" %include "gr_cpm.i" %include "gr_correlate_access_code_tag_bb.i" +%include "gr_add_ff.i" diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i index 82f9a6006..e12f2b0ec 100644 --- a/gnuradio-core/src/lib/general/general_generated.i +++ b/gnuradio-core/src/lib/general/general_generated.i @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -85,7 +84,6 @@ %include %include %include -%include %include %include %include diff --git a/gnuradio-core/src/lib/general/gr_add_ff.cc b/gnuradio-core/src/lib/general/gr_add_ff.cc new file mode 100644 index 000000000..a5db5ec5c --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_add_ff.cc @@ -0,0 +1,70 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_add_ff_sptr +gr_make_add_ff(size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_add_ff(vlen)); +} + +gr_add_ff::gr_add_ff (size_t vlen) + : gr_sync_block("add_ff", + gr_make_io_signature (1, -1, sizeof(float)*vlen), + gr_make_io_signature (1, 1, sizeof(float)*vlen)), + d_vlen (vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(float); + set_alignment(alignment_multiple); +} + +int +gr_add_ff::work(int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + float *out = (float *) output_items[0]; + int noi = d_vlen*noutput_items; + + memcpy(out, input_items[0], noi*sizeof(float)); + for(size_t i = 1; i < input_items.size(); i++) + volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi); + /* + if(is_unaligned()) { + for(size_t i = 1; i < input_items.size(); i++) + volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi); + } + else { + for(size_t i = 1; i < input_items.size(); i++) + volk_32f_x2_add_32f_a(out, out, (const float*)input_items[i], noi); + } + */ + return noutput_items; +} diff --git a/gnuradio-core/src/lib/general/gr_add_ff.h b/gnuradio-core/src/lib/general/gr_add_ff.h new file mode 100644 index 000000000..6421f8da2 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_add_ff.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_ADD_FF_H +#define INCLUDED_GR_ADD_FF_H + +#include +#include + +class gr_add_ff; +typedef boost::shared_ptr gr_add_ff_sptr; + +GR_CORE_API gr_add_ff_sptr +gr_make_add_ff (size_t vlen=1); + +/*! + * \brief Add streams of complex values + * \ingroup math_blk + */ + +class GR_CORE_API gr_add_ff : public gr_sync_block +{ + private: + friend GR_CORE_API gr_add_ff_sptr + gr_make_add_ff (size_t vlen); + gr_add_ff (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_ADD_FF_H */ diff --git a/gnuradio-core/src/lib/general/gr_add_ff.i b/gnuradio-core/src/lib/general/gr_add_ff.i new file mode 100644 index 000000000..3c30640b1 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_add_ff.i @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,add_ff) + +gr_add_ff_sptr +gr_make_add_ff (size_t vlen=1); + +class gr_add_ff : public gr_sync_block +{ +public: + +}; diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore index 72c915cb8..b91d2f197 100644 --- a/gnuradio-core/src/lib/gengen/.gitignore +++ b/gnuradio-core/src/lib/gengen/.gitignore @@ -124,9 +124,6 @@ /gr_add_const_vss.cc /gr_add_const_vss.h /gr_add_const_vss.i -/gr_add_ff.cc -/gr_add_ff.h -/gr_add_ff.i /gr_add_ii.cc /gr_add_ii.h /gr_add_ii.i diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt index c3c4a7a35..98b149e97 100644 --- a/gnuradio-core/src/lib/gengen/CMakeLists.txt +++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt @@ -87,7 +87,7 @@ expand_h_cc_i(gr_sig_source_X s i f c) expand_h_cc_i(gr_add_const_XX ss ii ff cc sf) expand_h_cc_i(gr_multiply_const_XX ss ii) -expand_h_cc_i(gr_add_XX ss ii ff cc) +expand_h_cc_i(gr_add_XX ss ii cc) expand_h_cc_i(gr_sub_XX ss ii ff cc) expand_h_cc_i(gr_multiply_XX ss ii) expand_h_cc_i(gr_divide_XX ss ii ff cc) diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py index 0c3d4579d..616cc4b06 100755 --- a/gnuradio-core/src/lib/gengen/generate_common.py +++ b/gnuradio-core/src/lib/gengen/generate_common.py @@ -41,7 +41,6 @@ reg_signatures = ['ss', 'ii', 'ff', 'cc'] reg_roots = [ 'gr_add_const_XX', - 'gr_add_XX', 'gr_sub_XX', 'gr_divide_XX', 'gr_mute_XX', @@ -66,7 +65,8 @@ others = ( ('gr_max_XX', ('ff','ii','ss')), ('gr_peak_detector_XX', ('fb','ib','sb')), ('gr_multiply_XX', ('ss','ii')), - ('gr_multiply_const_XX', ('ss','ii')) + ('gr_multiply_const_XX', ('ss','ii')), + ('gr_add_XX', ('ss','cc')) ) -- cgit From 69210086c7ae98b93a63a1d810ee28b304a13520 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 6 Feb 2012 22:02:09 -0500 Subject: volk: added a 32fc multiply conjugate kernel. --- volk/apps/volk_profile.cc | 2 + volk/include/volk/Makefile.am | 2 + .../volk/volk_32fc_x2_multiply_conjugate_32fc_a.h | 82 ++++++++++++++++++++++ .../volk/volk_32fc_x2_multiply_conjugate_32fc_u.h | 81 +++++++++++++++++++++ volk/lib/testqa.cc | 2 + 5 files changed, 169 insertions(+) create mode 100644 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h create mode 100644 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 712c32bce..6ba7f17bb 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -56,6 +56,8 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_magnitude_squared_32f_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results); diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 20864efbe..d071f18f2 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -59,6 +59,8 @@ volkinclude_HEADERS = \ volk_32fc_32f_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_u.h \ + volk_32fc_s32fc_multiply_conjugate_32fc_a.h \ + volk_32fc_s32fc_multiply_conjugate_32fc_u.h \ volk_32fc_s32f_power_32fc_a.h \ volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \ volk_32fc_s32f_atan2_32f_a.h \ diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h new file mode 100644 index 000000000..70476a8c7 --- /dev/null +++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h @@ -0,0 +1,82 @@ +#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H +#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector First vector to be multiplied + \param bVector Second vector that is conjugated before being multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, y, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + const lv_32fc_t* b = bVector; + + __m128 conjugator = _mm_setr_ps(1, -1, 1, -1); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + // FIXME: replace with xor for a faster implementation + y = _mm_mul_ps(y, conjugator); // conjugate y + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_store_ps((float*)c,z); // Store the results back into the C container + + a += 2; + b += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * lv_conj(*b); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector First vector to be multiplied + \param bVector Second vector that is conjugated before being multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + const lv_32fc_t* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h new file mode 100644 index 000000000..fbaa29c17 --- /dev/null +++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h @@ -0,0 +1,81 @@ +#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H +#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector First vector to be multiplied + \param bVector Second vector that is conjugated before being multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, y, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + const lv_32fc_t* b = bVector; + + __m128 conjugator = _mm_set_ps(0, 0x80000000, 0, 0x80000000); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + y = _mm_xor_ps(y, conjugator); // conjugate y + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_storeu_ps((float*)c,z); // Store the results back into the C container + + a += 2; + b += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * lv_conj(*b); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector First vector to be multiplied + \param bVector Second vector that is conjugated before being multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_32fc_x2_multiply_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + const lv_32fc_t* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index b00ea0b64..fdd3d4853 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -91,6 +91,8 @@ VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1); VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1); -- cgit From f34b496341ceb73baffee6f8bf84ed197ffeeaf0 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 6 Feb 2012 22:02:56 -0500 Subject: core: added Volk-optimized gr_multiply_conjugate_cc at one block with QA code. --- gnuradio-core/src/lib/general/CMakeLists.txt | 1 + gnuradio-core/src/lib/general/Makefile.am | 3 + gnuradio-core/src/lib/general/general.i | 2 + gnuradio-core/src/lib/general/gr_add_ff.cc | 4 -- .../src/lib/general/gr_multiply_conjugate_cc.cc | 69 ++++++++++++++++++++++ .../src/lib/general/gr_multiply_conjugate_cc.h | 57 ++++++++++++++++++ .../src/lib/general/gr_multiply_conjugate_cc.i | 32 ++++++++++ gnuradio-core/src/python/gnuradio/gr/Makefile.am | 1 + .../python/gnuradio/gr/qa_multiply_conjugate.py | 57 ++++++++++++++++++ 9 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i create mode 100644 gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt index ab35aa66a..2bc639cd3 100644 --- a/gnuradio-core/src/lib/general/CMakeLists.txt +++ b/gnuradio-core/src/lib/general/CMakeLists.txt @@ -235,6 +235,7 @@ set(gr_core_general_triple_threats gr_multiply_ff gr_multiply_const_cc gr_multiply_const_ff + gr_multiply_conjugate_cc gr_nlog10_ff gr_nop gr_null_sink diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index 89152d663..ea3b31fd3 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -99,6 +99,7 @@ libgeneral_la_SOURCES = \ gr_multiply_ff.cc \ gr_multiply_const_cc.cc \ gr_multiply_const_ff.cc \ + gr_multiply_conjugate_cc.cc \ gr_nlog10_ff.cc \ gr_nop.cc \ gr_null_sink.cc \ @@ -259,6 +260,7 @@ grinclude_HEADERS = \ gr_multiply_ff.h \ gr_multiply_const_cc.h \ gr_multiply_const_ff.h \ + gr_multiply_conjugate_cc.h \ gr_nco.h \ gr_nlog10_ff.h \ gr_nop.h \ @@ -423,6 +425,7 @@ swiginclude_HEADERS = \ gr_multiply_ff.i \ gr_multiply_const_cc.i \ gr_multiply_const_ff.i \ + gr_multiply_conjugate_cc.i \ gr_nlog10_ff.i \ gr_nop.i \ gr_null_sink.i \ diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i index c384ecfbb..89738b01a 100644 --- a/gnuradio-core/src/lib/general/general.i +++ b/gnuradio-core/src/lib/general/general.i @@ -110,6 +110,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +232,7 @@ %include "gr_multiply_ff.i" %include "gr_multiply_const_cc.i" %include "gr_multiply_const_ff.i" +%include "gr_multiply_conjugate_cc.i" %include "gr_feval.i" %include "gr_pwr_squelch_cc.i" %include "gr_pwr_squelch_ff.i" diff --git a/gnuradio-core/src/lib/general/gr_add_ff.cc b/gnuradio-core/src/lib/general/gr_add_ff.cc index a5db5ec5c..fc5455c98 100644 --- a/gnuradio-core/src/lib/general/gr_add_ff.cc +++ b/gnuradio-core/src/lib/general/gr_add_ff.cc @@ -54,9 +54,6 @@ gr_add_ff::work(int noutput_items, int noi = d_vlen*noutput_items; memcpy(out, input_items[0], noi*sizeof(float)); - for(size_t i = 1; i < input_items.size(); i++) - volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi); - /* if(is_unaligned()) { for(size_t i = 1; i < input_items.size(); i++) volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi); @@ -65,6 +62,5 @@ gr_add_ff::work(int noutput_items, for(size_t i = 1; i < input_items.size(); i++) volk_32f_x2_add_32f_a(out, out, (const float*)input_items[i], noi); } - */ return noutput_items; } diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc new file mode 100644 index 000000000..103d87b8b --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc @@ -0,0 +1,69 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +gr_multiply_conjugate_cc_sptr +gr_make_multiply_conjugate_cc (size_t vlen) +{ + return gnuradio::get_initial_sptr(new gr_multiply_conjugate_cc (vlen)); +} + +gr_multiply_conjugate_cc::gr_multiply_conjugate_cc (size_t vlen) + : gr_sync_block ("gr_multiply_conjugate_cc", + gr_make_io_signature (2, 2, sizeof (gr_complex)*vlen), + gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)), + d_vlen(vlen) +{ + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(alignment_multiple); +} + +int +gr_multiply_conjugate_cc::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + gr_complex *in0 = (gr_complex *) input_items[0]; + gr_complex *in1 = (gr_complex *) input_items[1]; + gr_complex *out = (gr_complex *) output_items[0]; + int noi = d_vlen*noutput_items; + + if(is_unaligned()) { + volk_32fc_x2_multiply_conjugate_32fc_u(out, in0, in1, noi); + } + else { + volk_32fc_x2_multiply_conjugate_32fc_a(out, in0, in1, noi); + } + + return noutput_items; +} + + + diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h new file mode 100644 index 000000000..eb032f31b --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h @@ -0,0 +1,57 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H +#define INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H + +#include +#include + +class gr_multiply_conjugate_cc; +typedef boost::shared_ptr +gr_multiply_conjugate_cc_sptr; + +GR_CORE_API gr_multiply_conjugate_cc_sptr +gr_make_multiply_conjugate_cc (size_t vlen=1); + +/*! + * \brief Multiplies a stream by the conjugate of the second stream + * \ingroup math_blk + */ + +class GR_CORE_API gr_multiply_conjugate_cc : public gr_sync_block +{ + private: + friend GR_CORE_API gr_multiply_conjugate_cc_sptr + gr_make_multiply_conjugate_cc (size_t vlen); + gr_multiply_conjugate_cc (size_t vlen); + + size_t d_vlen; + + public: + virtual int work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items); +}; + + +#endif /* INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H */ diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i new file mode 100644 index 000000000..023410505 --- /dev/null +++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +GR_SWIG_BLOCK_MAGIC(gr,multiply_conjugate_cc) + +gr_multiply_conjugate_cc_sptr +gr_make_multiply_conjugate_cc (size_t vlen=1); + +class gr_multiply_conjugate_cc : public gr_sync_block +{ +public: + +}; diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am index 16dd14790..3c9edcf5b 100644 --- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am +++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am @@ -79,6 +79,7 @@ noinst_PYTHON = \ qa_kludged_imports.py \ qa_max.py \ qa_message.py \ + qa_multiply_conjugate.py \ qa_mute.py \ qa_nlog10.py \ qa_noise.py \ diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py new file mode 100644 index 000000000..aaf3cc125 --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest + +class test_multiply_conjugate (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_000 (self): + src_data0 = (-2-2j, -1-1j, -2+2j, -1+1j, + 2-2j, 1-1j, 2+2j, 1+1j, + 0+0j) + src_data1 = (-3-3j, -4-4j, -3+3j, -4+4j, + 3-3j, 4-4j, 3+3j, 4+4j, + 0+0j) + + exp_data = (12+0j, 8+0j, 12+0j, 8+0j, + 12+0j, 8+0j, 12+0j, 8+0j, + 0+0j) + src0 = gr.vector_source_c(src_data0) + src1 = gr.vector_source_c(src_data1) + op = gr.multiply_conjugate_cc () + dst = gr.vector_sink_c () + + self.tb.connect(src0, (op,0)) + self.tb.connect(src1, (op,1)) + self.tb.connect(op, dst) + self.tb.run() + result_data = dst.data () + self.assertEqual (exp_data, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_multiply_conjugate, "test_multiply_conjugate.xml") -- cgit From cdb328758dca9fa494956c0e62f5e78adf613982 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 16:59:50 -0500 Subject: volk: fixed complex multiply and conjugate kernel to use xor for conjugation. --- volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h | 5 ++--- volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h index 70476a8c7..2a1bcbce0 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h +++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h @@ -24,15 +24,14 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVecto const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; - __m128 conjugator = _mm_setr_ps(1, -1, 1, -1); + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); for(;number < halfPoints; number++){ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di - // FIXME: replace with xor for a faster implementation - y = _mm_mul_ps(y, conjugator); // conjugate y + y = _mm_xor_ps(y, conjugator); // conjugate y yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h index fbaa29c17..92f6a051e 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h @@ -24,7 +24,7 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVecto const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; - __m128 conjugator = _mm_set_ps(0, 0x80000000, 0, 0x80000000); + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); for(;number < halfPoints; number++){ -- cgit From 3080cd75a6a10aab757e1e02fb99e81e2f3724d5 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 17:23:44 -0500 Subject: volk: adding complex conjugate kernel. --- volk/apps/volk_profile.cc | 2 + volk/include/volk/Makefile.am | 5 +- volk/include/volk/volk_32fc_conjugate_32fc_a.h | 64 ++++++++++++++++++++++++++ volk/include/volk/volk_32fc_conjugate_32fc_u.h | 64 ++++++++++++++++++++++++++ volk/lib/testqa.cc | 2 + 5 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 volk/include/volk/volk_32fc_conjugate_32fc_a.h create mode 100644 volk/include/volk/volk_32fc_conjugate_32fc_u.h diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 6ba7f17bb..0da21ffa5 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -58,6 +58,8 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results); diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index d071f18f2..f6b5835b1 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -134,4 +134,7 @@ volkinclude_HEADERS = \ volk_8i_convert_16i_a.h \ volk_8i_convert_16i_u.h \ volk_8i_s32f_convert_32f_a.h \ - volk_8i_s32f_convert_32f_u.h + volk_8i_s32f_convert_32f_u.h \ + volk_32fc_conjugate_32fc_a.h \ + volk_32fc_conjugate_32fc_u.h + diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_conjugate_32fc_a.h new file mode 100644 index 000000000..1518af9be --- /dev/null +++ b/volk/include/volk/volk_32fc_conjugate_32fc_a.h @@ -0,0 +1,64 @@ +#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H +#define INCLUDED_volk_32fc_conjugate_32fc_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi + + x = _mm_xor_ps(x, conjugator); // conjugate register + + _mm_store_ps((float*)c,x); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = lv_conj(*a); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_conjugate_32fc_u.h new file mode 100644 index 000000000..b26fe0789 --- /dev/null +++ b/volk/include/volk/volk_32fc_conjugate_32fc_u.h @@ -0,0 +1,64 @@ +#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H +#define INCLUDED_volk_32fc_conjugate_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi + + x = _mm_xor_ps(x, conjugator); // conjugate register + + _mm_storeu_ps((float*)c,x); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = lv_conj(*a); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_32fc_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index fdd3d4853..593087f85 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -93,6 +93,8 @@ VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1); VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1); -- cgit From 75bb99df4720789749c059a0207507a3cbdd3855 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 17:49:36 -0500 Subject: core: using volk for conjugate block and added QA code for it. --- gnuradio-core/src/lib/general/gr_conjugate_cc.cc | 27 ++++------- gnuradio-core/src/python/gnuradio/gr/Makefile.am | 1 + .../src/python/gnuradio/gr/qa_conjugate.py | 53 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 19 deletions(-) create mode 100644 gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py diff --git a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc index 59c3bae89..d2b20ffe6 100644 --- a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc +++ b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc @@ -28,6 +28,7 @@ #include #include +#include gr_conjugate_cc_sptr gr_make_conjugate_cc () @@ -40,6 +41,9 @@ gr_conjugate_cc::gr_conjugate_cc () gr_make_io_signature (1, 1, sizeof (gr_complex)), gr_make_io_signature (1, 1, sizeof (gr_complex))) { + const int alignment_multiple = + volk_get_alignment() / sizeof(gr_complex); + set_alignment(alignment_multiple); } int @@ -50,26 +54,11 @@ gr_conjugate_cc::work (int noutput_items, gr_complex *iptr = (gr_complex *) input_items[0]; gr_complex *optr = (gr_complex *) output_items[0]; - int size = noutput_items; - - while (size >= 8){ - optr[0] = conj(iptr[0]); - optr[1] = conj(iptr[1]); - optr[2] = conj(iptr[2]); - optr[3] = conj(iptr[3]); - optr[4] = conj(iptr[4]); - optr[5] = conj(iptr[5]); - optr[6] = conj(iptr[6]); - optr[7] = conj(iptr[7]); - size -= 8; - optr += 8; - iptr += 8; + if(is_unaligned()) { + volk_32fc_conjugate_32fc_u(optr, iptr, noutput_items); } - - while (size-- > 0) { - *optr = conj(*iptr); - iptr++; - optr++; + else { + volk_32fc_conjugate_32fc_a(optr, iptr, noutput_items); } return noutput_items; diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am index 3c9edcf5b..9853766f9 100644 --- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am +++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am @@ -50,6 +50,7 @@ noinst_PYTHON = \ qa_bin_statistics.py \ qa_classify.py \ qa_complex_to_xxx.py \ + qa_conjugate.py \ qa_copy.py \ qa_delay.py \ qa_dc_blocker.py \ diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py new file mode 100644 index 000000000..c07902a5a --- /dev/null +++ b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from gnuradio import gr, gr_unittest + +class test_conjugate (gr_unittest.TestCase): + + def setUp (self): + self.tb = gr.top_block () + + def tearDown (self): + self.tb = None + + def test_000 (self): + src_data = (-2-2j, -1-1j, -2+2j, -1+1j, + 2-2j, 1-1j, 2+2j, 1+1j, + 0+0j) + + exp_data = (-2+2j, -1+1j, -2-2j, -1-1j, + 2+2j, 1+1j, 2-2j, 1-1j, + 0-0j) + + src = gr.vector_source_c(src_data) + op = gr.conjugate_cc () + dst = gr.vector_sink_c () + + self.tb.connect(src, op) + self.tb.connect(op, dst) + self.tb.run() + result_data = dst.data () + self.assertEqual (exp_data, result_data) + +if __name__ == '__main__': + gr_unittest.run(test_conjugate, "test_conjugate.xml") -- cgit From 786058aacbe0ca662e14ea5f00f1c0872a599577 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 18:32:09 -0500 Subject: volk: adding an examples directory with scripts to benchmark and compare volk-optimized GR blocks. --- .../python/volk_benchmark/volk_math.py | 146 ++++++++++++++++++ .../python/volk_benchmark/volk_plot.py | 81 ++++++++++ .../python/volk_benchmark/volk_test_funcs.py | 171 +++++++++++++++++++++ 3 files changed, 398 insertions(+) create mode 100755 gnuradio-examples/python/volk_benchmark/volk_math.py create mode 100755 gnuradio-examples/python/volk_benchmark/volk_plot.py create mode 100644 gnuradio-examples/python/volk_benchmark/volk_test_funcs.py diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py new file mode 100755 index 000000000..ec85ce0ad --- /dev/null +++ b/gnuradio-examples/python/volk_benchmark/volk_math.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +from gnuradio import gr +import argparse +from volk_test_funcs import * + +def multiply_const_cc(N): + k = 3.3 + op = gr.multiply_const_cc(k) + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1) + return tb + +###################################################################### + +def multiply_const_ff(N): + k = 3.3 + op = gr.multiply_const_ff(k) + tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def multiply_cc(N): + op = gr.multiply_cc() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) + return tb + +###################################################################### + +def multiply_ff(N): + op = gr.multiply_ff() + tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1) + return tb + +###################################################################### + +def add_ff(N): + op = gr.add_ff() + tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1) + return tb + +###################################################################### + +def conjugate_cc(N): + op = gr.conjugate_cc() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1) + return tb + +###################################################################### + +def multiply_conjugate_cc_volk(N): + op = gr.multiply_conjugate_cc() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) + return tb + +def multiply_conjugate_cc_nonvolk(N): + class s(gr.hier_block2): + def __init__(self): + gr.hier_block2.__init__(self, "s", + gr.io_signature(2, 2, gr.sizeof_gr_complex), + gr.io_signature(1, 1, gr.sizeof_gr_complex)) + conj = gr.conjugate_cc() + mult = gr.multiply_cc() + self.connect((self,0), (mult,0)) + self.connect((self,1), conj, (mult,1)) + self.connect(mult, self) + + op = s() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) + return tb + +#multiply_conjugate_cc = multiply_conjugate_cc_volk +multiply_conjugate_cc = multiply_conjugate_cc_nonvolk + +###################################################################### + +def run_tests(func, N, iters): + print("Running Test: {0}".format(func.__name__)) + tb = func(N) + t = timeit(tb, iters) + res = format_results(func.__name__, t) + return res + +def main(): + avail_tests = [multiply_const_cc, + multiply_const_ff, + multiply_cc, + multiply_ff, + add_ff, + conjugate_cc, + multiply_conjugate_cc] + + desc='Time an operation to compare with other implementations. \ + This program runs a simple GNU Radio flowgraph to test a \ + particular math function, mostly to compare the \ + Volk-optimized implementation versus a regular \ + implementation. The results are stored to an SQLite database \ + that can then be read by volk_plot.py to plot the differences.' + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('label', type=str, + default=None, + help='Label of database table [default: %(default)s]') + parser.add_argument('-D', '--database', type=str, + default="volk_results.db", + help='Database file to store data in [default: %(default)s]') + parser.add_argument('-N', '--nitems', type=float, + default=1e9, + help='Number of items per iterations [default: %(default)s]') + parser.add_argument('-I', '--iterations', type=int, + default=20, + help='Number of iterations [default: %(default)s]') + parser.add_argument('--test', type=int, + choices=xrange(len(avail_tests)), + help='Test to run') + parser.add_argument('--list', action='store_true', + help='List the available tests') + parser.add_argument('--all', action='store_true', + help='Run all tests') + args = parser.parse_args() + + if(args.list): + print "Available Tests to Run:" + print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)]) + sys.exit(0) + + N = int(args.nitems) + iters = args.iterations + label = args.label + + conn = create_connection(args.database) + new_table(conn, label) + + if not args.all: + func = avail_tests[args.test] + res = run_tests(func, N, iters) + replace_results(conn, label, N, iters, res) + else: + for f in avail_tests: + res = run_tests(f, N, iters) + replace_results(conn, label, N, iters, res) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + pass diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py new file mode 100755 index 000000000..665df5e14 --- /dev/null +++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +import sys +import argparse +from volk_test_funcs import * + +try: + import matplotlib + import matplotlib.pyplot as plt +except ImportError: + sys.stderr.write("Could not import Matplotlib (http://matplotlib.sourceforge.net/)\n") + sys.exit(1) + +def main(): + desc='Plot Volk performance results from a SQLite database. ' + \ + 'Run one of the volk tests first (e.g, volk_math.py)' + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('-D', '--database', type=str, + default="volk_results.db", + help='Database file to read data from [default: %(default)s]') + args = parser.parse_args() + + # Set up global plotting properties + matplotlib.rcParams['figure.subplot.bottom'] = 0.2 + matplotlib.rcParams['figure.subplot.top'] = 0.95 + matplotlib.rcParams['ytick.labelsize'] = 16 + matplotlib.rcParams['xtick.labelsize'] = 16 + matplotlib.rcParams['legend.fontsize'] = 18 + + # Get list of tables to compare + conn = create_connection(args.database) + tables = list_tables(conn) + M = len(tables) + + # width of bars depends on number of comparisons + wdth = 0.80/M + + colors = ['b', 'r', 'g', 'm', 'k'] + + # Set up figure for plotting + f0 = plt.figure(0, facecolor='w', figsize=(14,10)) + s0 = f0.add_subplot(1,1,1) + + for i,table in enumerate(tables): + # Get results from the next table + res = get_results(conn, table[0]) + + xlabels = [] + averages = [] + variances = [] + maxes = [] + mins = [] + for r in res: + xlabels.append(r['kernel']) + averages.append(r['avg']) + variances.append(r['var']) + maxes.append(r['max']) + mins.append(r['min']) + + # makes x values for this data set placement + x0 = xrange(len(res)) + x1 = [x + i*wdth for x in x0] + + s0.bar(x1, averages, width=wdth, + #yerr=variances, + color=colors[i%M], label=table[0], + edgecolor='k', linewidth=2) + + s0.legend() + s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']), + fontsize=22, fontweight='bold') + s0.set_xticks(x0) + s0.set_xticklabels(xlabels) + for label in s0.xaxis.get_ticklabels(): + label.set_rotation(45) + label.set_fontsize(16) + + plt.show() + +if __name__ == "__main__": + main() diff --git a/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py new file mode 100644 index 000000000..4f4e4afd3 --- /dev/null +++ b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +from gnuradio import gr +import math, sys, os, time + +try: + import scipy +except ImportError: + sys.stderr.write("Unable to import Scipy (www.scipy.org)\n") + sys.exit(1) + +try: + import sqlite3 +except ImportError: + sys.stderr.write("Unable to import sqlite3: requires Python 2.5\n") + sys.exit(1) + +def execute(conn, cmd): + ''' + Executes the command cmd to the database opened in connection conn. + ''' + c = conn.cursor() + c.execute(cmd) + conn.commit() + c.close() + +def create_connection(database): + ''' + Returns a connection object to the SQLite database. + ''' + return sqlite3.connect(database) + +def new_table(conn, tablename): + ''' + Create a new table for results. + All results are in the form: [kernel | nitems | iters | avg. time | variance | max time | min time ] + Each table is meant as a different setting (e.g., volk_aligned, volk_unaligned, etc.) + ''' + cols = "kernel text, nitems int, iters int, avg real, var real, max real, min real" + cmd = "create table if not exists {0} ({1})".format( + tablename, cols) + execute(conn, cmd) + +def replace_results(conn, tablename, nitems, iters, res): + ''' + Inserts or replaces the results 'res' dictionary values into the table. + This deletes all old entries of the kernel in this table. + ''' + cmd = "DELETE FROM {0} where kernel='{1}'".format(tablename, res["kernel"]) + execute(conn, cmd) + insert_results(conn, tablename, nitems, iters, res) + +def insert_results(conn, tablename, nitems, iters, res): + ''' + Inserts the results dictionary values into the table. + ''' + cols = "kernel, nitems, iters, avg, var, max, min" + cmd = "INSERT INTO {0} ({1}) VALUES ('{2}', {3}, {4}, {5}, {6}, {7}, {8})".format( + tablename, cols, res["kernel"], nitems, iters, + res["avg"], res["var"], res["max"], res["min"]) + execute(conn, cmd) + +def list_tables(conn): + ''' + Returns a list of all tables in the database. + ''' + cmd = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + c = conn.cursor() + c.execute(cmd) + t = c.fetchall() + c.close() + + return t + +def get_results(conn, tablename): + ''' + Gets all results in tablename. + ''' + cmd = "SELECT * FROM {0}".format(tablename) + c = conn.cursor() + c.execute(cmd) + fetched = c.fetchall() + c.close() + + res = list() + for f in fetched: + r = dict() + r['kernel'] = f[0] + r['nitems'] = f[1] + r['iters'] = f[2] + r['avg'] = f[3] + r['var'] = f[4] + r['min'] = f[5] + r['max'] = f[6] + res.append(r) + + return res + + +class helper(gr.top_block): + ''' + Helper function to run the tests. The parameters are: + N: number of items to process (int) + op: The GR block/hier_block to test + isizeof: the sizeof the input type + osizeof: the sizeof the output type + nsrcs: number of inputs to the op + nsnks: number of outputs of the op + + This function can only handle blocks where all inputs are the same + datatype and all outputs are the same data type + ''' + def __init__(self, N, op, + isizeof=gr.sizeof_gr_complex, + osizeof=gr.sizeof_gr_complex, + nsrcs=1, nsnks=1): + gr.top_block.__init__(self, "helper") + + self.op = op + self.srcs = [] + self.snks = [] + self.head = gr.head(isizeof, N) + + for n in xrange(nsrcs): + self.srcs.append(gr.null_source(isizeof)) + + for n in xrange(nsnks): + self.snks.append(gr.null_sink(osizeof)) + + self.connect(self.srcs[0], self.head, (self.op,0)) + + for n in xrange(1, nsrcs): + self.connect(self.srcs[n], (self.op,n)) + + for n in xrange(nsnks): + self.connect((self.op,n), self.snks[n]) + +def timeit(tb, iterations): + ''' + Given a top block, this function times it for a number of + iterations and stores the time in a list that is returned. + ''' + r = gr.enable_realtime_scheduling() + if r != gr.RT_OK: + print "Warning: failed to enable realtime scheduling" + + times = [] + for i in xrange(iterations): + start_time = time.time() + tb.run() + end_time = time.time() + tb.head.reset() + + times.append(end_time - start_time) + + return times + +def format_results(kernel, times): + ''' + Convinience function to convert the results of the timeit function + into a dictionary. + ''' + res = dict() + res["kernel"] = kernel + res["avg"] = scipy.mean(times) + res["var"] = scipy.var(times) + res["max"] = max(times) + res["min"] = min(times) + return res + + -- cgit From 5a07519b2685fabab3e75380657a53d2161dc1a2 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 22:17:28 -0500 Subject: core: fixed alignment call for char_to_short. --- gnuradio-core/src/lib/general/gr_char_to_short.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc index 40ffa9338..13375412c 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc @@ -41,7 +41,7 @@ gr_char_to_short::gr_char_to_short (size_t vlen) d_vlen(vlen) { const int alignment_multiple = - volk_get_alignment() / sizeof(float); + volk_get_alignment() / sizeof(short); set_alignment(alignment_multiple); } -- cgit From 4c048e77d0f7f78cd684534133a9312be936fcc6 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Feb 2012 22:18:00 -0500 Subject: volk: test commands for measuring type conversion performance. --- .../python/volk_benchmark/volk_types.py | 183 +++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100755 gnuradio-examples/python/volk_benchmark/volk_types.py diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py new file mode 100755 index 000000000..3dd10ae96 --- /dev/null +++ b/gnuradio-examples/python/volk_benchmark/volk_types.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python + +from gnuradio import gr +import argparse +from volk_test_funcs import * + +###################################################################### + +def float_to_char(N): + op = gr.float_to_char() + tb = helper(N, op, gr.sizeof_float, gr.sizeof_char, 1, 1) + return tb + +###################################################################### + +def float_to_int(N): + op = gr.float_to_int() + tb = helper(N, op, gr.sizeof_float, gr.sizeof_int, 1, 1) + return tb + +###################################################################### + +def float_to_short(N): + op = gr.float_to_short() + tb = helper(N, op, gr.sizeof_float, gr.sizeof_short, 1, 1) + return tb + +###################################################################### + +def short_to_float(N): + op = gr.short_to_float() + tb = helper(N, op, gr.sizeof_short, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def short_to_char(N): + op = gr.short_to_char() + tb = helper(N, op, gr.sizeof_short, gr.sizeof_char, 1, 1) + return tb + +###################################################################### + +def char_to_short(N): + op = gr.char_to_short() + tb = helper(N, op, gr.sizeof_char, gr.sizeof_short, 1, 1) + return tb + +###################################################################### + +def char_to_float(N): + op = gr.char_to_float() + tb = helper(N, op, gr.sizeof_char, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def int_to_float(N): + op = gr.int_to_float() + tb = helper(N, op, gr.sizeof_int, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def complex_to_float(N): + op = gr.complex_to_float() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 2) + return tb + +###################################################################### + +def complex_to_real(N): + op = gr.complex_to_real() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def complex_to_imag(N): + op = gr.complex_to_imag() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def complex_to_mag(N): + op = gr.complex_to_mag() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def complex_to_mag_squared(N): + op = gr.complex_to_mag_squared() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def complex_to_arg(N): + op = gr.complex_to_arg() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) + return tb + +###################################################################### + +def run_tests(func, N, iters): + print("Running Test: {0}".format(func.__name__)) + tb = func(N) + t = timeit(tb, iters) + res = format_results(func.__name__, t) + return res + +def main(): + avail_tests = [float_to_char, + float_to_int, + float_to_short, + short_to_float, + short_to_char, + char_to_short, + char_to_float, + int_to_float, + complex_to_float, + complex_to_real, + complex_to_imag, + complex_to_mag, + complex_to_mag_squared, + complex_to_arg] + + desc='Time an operation to compare with other implementations. \ + This program runs a simple GNU Radio flowgraph to test a \ + particular math function, mostly to compare the \ + Volk-optimized implementation versus a regular \ + implementation. The results are stored to an SQLite database \ + that can then be read by volk_plot.py to plot the differences.' + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('label', type=str, + default=None, + help='Label of database table [default: %(default)s]') + parser.add_argument('-D', '--database', type=str, + default="volk_results.db", + help='Database file to store data in [default: %(default)s]') + parser.add_argument('-N', '--nitems', type=float, + default=1e9, + help='Number of items per iterations [default: %(default)s]') + parser.add_argument('-I', '--iterations', type=int, + default=20, + help='Number of iterations [default: %(default)s]') + parser.add_argument('--test', type=int, + choices=xrange(len(avail_tests)), + help='Test to run') + parser.add_argument('--list', action='store_true', + help='List the available tests') + parser.add_argument('--all', action='store_true', + help='Run all tests') + args = parser.parse_args() + + if(args.list): + print "Available Tests to Run:" + print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)]) + sys.exit(0) + + N = int(args.nitems) + iters = args.iterations + label = args.label + + conn = create_connection(args.database) + new_table(conn, label) + + if not args.all: + func = avail_tests[args.test] + res = run_tests(func, N, iters) + replace_results(conn, label, N, iters, res) + else: + for f in avail_tests: + res = run_tests(f, N, iters) + replace_results(conn, label, N, iters, res) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + pass -- cgit From a9a2c632040d37562a64eb81ed7d4f136a7a774e Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 9 Feb 2012 20:49:44 -0500 Subject: volk: improved GR benchmark and plotting utilities. --- .../python/volk_benchmark/volk_math.py | 61 ++++++++-------- .../python/volk_benchmark/volk_plot.py | 84 +++++++++++++++++----- .../python/volk_benchmark/volk_types.py | 18 +++-- 3 files changed, 110 insertions(+), 53 deletions(-) diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py index ec85ce0ad..42f3ffa4b 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_math.py +++ b/gnuradio-examples/python/volk_benchmark/volk_math.py @@ -48,38 +48,41 @@ def conjugate_cc(N): ###################################################################### -def multiply_conjugate_cc_volk(N): - op = gr.multiply_conjugate_cc() - tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) - return tb - -def multiply_conjugate_cc_nonvolk(N): - class s(gr.hier_block2): - def __init__(self): - gr.hier_block2.__init__(self, "s", - gr.io_signature(2, 2, gr.sizeof_gr_complex), - gr.io_signature(1, 1, gr.sizeof_gr_complex)) - conj = gr.conjugate_cc() - mult = gr.multiply_cc() - self.connect((self,0), (mult,0)) - self.connect((self,1), conj, (mult,1)) - self.connect(mult, self) - - op = s() - tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) - return tb +def multiply_conjugate_cc(N): + try: + op = gr.multiply_conjugate_cc() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) + return tb + + except AttributeError: + class s(gr.hier_block2): + def __init__(self): + gr.hier_block2.__init__(self, "s", + gr.io_signature(2, 2, gr.sizeof_gr_complex), + gr.io_signature(1, 1, gr.sizeof_gr_complex)) + conj = gr.conjugate_cc() + mult = gr.multiply_cc() + self.connect((self,0), (mult,0)) + self.connect((self,1), conj, (mult,1)) + self.connect(mult, self) + + op = s() + tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1) + return tb -#multiply_conjugate_cc = multiply_conjugate_cc_volk -multiply_conjugate_cc = multiply_conjugate_cc_nonvolk ###################################################################### def run_tests(func, N, iters): print("Running Test: {0}".format(func.__name__)) - tb = func(N) - t = timeit(tb, iters) - res = format_results(func.__name__, t) - return res + try: + tb = func(N) + t = timeit(tb, iters) + res = format_results(func.__name__, t) + return res + except AttributeError: + print "\tCould not run test. Skipping." + return None def main(): avail_tests = [multiply_const_cc, @@ -133,11 +136,13 @@ def main(): if not args.all: func = avail_tests[args.test] res = run_tests(func, N, iters) - replace_results(conn, label, N, iters, res) + if res is not None: + replace_results(conn, label, N, iters, res) else: for f in avail_tests: res = run_tests(f, N, iters) - replace_results(conn, label, N, iters, res) + if res is not None: + replace_results(conn, label, N, iters, res) if __name__ == "__main__": try: diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py index 665df5e14..d7578c5a7 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_plot.py +++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import sys +import sys, math import argparse from volk_test_funcs import * @@ -16,8 +16,15 @@ def main(): 'Run one of the volk tests first (e.g, volk_math.py)' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-D', '--database', type=str, - default="volk_results.db", + default='volk_results.db', help='Database file to read data from [default: %(default)s]') + parser.add_argument('-E', '--errorbars', + action='store_true', default=False, + help='Show error bars (1 standard dev.)') + parser.add_argument('-P', '--plot', type=str, + choices=['mean', 'min', 'max'], + default='mean', + help='Set the type of plot to produce [default: %(default)s]') args = parser.parse_args() # Set up global plotting properties @@ -35,42 +42,81 @@ def main(): # width of bars depends on number of comparisons wdth = 0.80/M + # Colors to distinguish each table in the bar graph + # More than 5 tables will wrap around to the start. colors = ['b', 'r', 'g', 'm', 'k'] # Set up figure for plotting f0 = plt.figure(0, facecolor='w', figsize=(14,10)) s0 = f0.add_subplot(1,1,1) + # Create a register of names that exist in all tables + tmp_regs = [] + for table in tables: + # Get results from the next table + res = get_results(conn, table[0]) + + tmp_regs.append(list()) + for r in res: + try: + tmp_regs[-1].index(r['kernel']) + except ValueError: + tmp_regs[-1].append(r['kernel']) + + # Get only those names that are common in all tables + name_reg = tmp_regs[0] + for t in tmp_regs[1:]: + name_reg = list(set(name_reg) & set(t)) + name_reg.sort() + + # Pull the data out for each table into a dictionary + # we can ref the table by it's name and the data associated + # with a given kernel in name_reg by it's name. + # This ensures there is no sorting issue with the data in the + # dictionary, so the kernels are plotted against each other. + table_data = dict() for i,table in enumerate(tables): # Get results from the next table res = get_results(conn, table[0]) - - xlabels = [] - averages = [] - variances = [] - maxes = [] - mins = [] + + data = dict() for r in res: - xlabels.append(r['kernel']) - averages.append(r['avg']) - variances.append(r['var']) - maxes.append(r['max']) - mins.append(r['min']) + data[r['kernel']] = r + + table_data[table[0]] = data + # Plot the results + x0 = xrange(len(name_reg)) + for i,t in enumerate(table_data): # makes x values for this data set placement - x0 = xrange(len(res)) x1 = [x + i*wdth for x in x0] - s0.bar(x1, averages, width=wdth, - #yerr=variances, - color=colors[i%M], label=table[0], - edgecolor='k', linewidth=2) + ydata = [] + stds = [] + for name in name_reg: + stds.append(math.sqrt(table_data[t][name]['var'])) + if(args.plot == 'max'): + ydata.append(table_data[t][name]['max']) + elif(args.plot == 'min'): + ydata.append(table_data[t][name]['min']) + if(args.plot == 'mean'): + ydata.append(table_data[t][name]['avg']) + + if(args.errorbars is False): + stds = None + + s0.bar(x1, ydata, width=wdth, + yerr=stds, + color=colors[i%M], label=t, + edgecolor='k', linewidth=2, + error_kw={"ecolor": 'k', "capsize":5, + "linewidth":2}) s0.legend() s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']), fontsize=22, fontweight='bold') s0.set_xticks(x0) - s0.set_xticklabels(xlabels) + s0.set_xticklabels(name_reg) for label in s0.xaxis.get_ticklabels(): label.set_rotation(45) label.set_fontsize(16) diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py index 3dd10ae96..8041ccac1 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_types.py +++ b/gnuradio-examples/python/volk_benchmark/volk_types.py @@ -106,10 +106,14 @@ def complex_to_arg(N): def run_tests(func, N, iters): print("Running Test: {0}".format(func.__name__)) - tb = func(N) - t = timeit(tb, iters) - res = format_results(func.__name__, t) - return res + try: + tb = func(N) + t = timeit(tb, iters) + res = format_results(func.__name__, t) + return res + except AttributeError: + print "\tCould not run test. Skipping." + return None def main(): avail_tests = [float_to_char, @@ -170,11 +174,13 @@ def main(): if not args.all: func = avail_tests[args.test] res = run_tests(func, N, iters) - replace_results(conn, label, N, iters, res) + if res is not None: + replace_results(conn, label, N, iters, res) else: for f in avail_tests: res = run_tests(f, N, iters) - replace_results(conn, label, N, iters, res) + if res is not None: + replace_results(conn, label, N, iters, res) if __name__ == "__main__": try: -- cgit From 84cb8f63d0d96ede1a6a10940112ae5a087029fc Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 9 Feb 2012 23:23:36 -0500 Subject: volk: better args for benchmarking volk tests; can specify a list of test numbers. --- .../python/volk_benchmark/volk_math.py | 9 ++++---- .../python/volk_benchmark/volk_types.py | 24 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py index 42f3ffa4b..8b0081387 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_math.py +++ b/gnuradio-examples/python/volk_benchmark/volk_math.py @@ -100,8 +100,8 @@ def main(): implementation. The results are stored to an SQLite database \ that can then be read by volk_plot.py to plot the differences.' parser = argparse.ArgumentParser(description=desc) - parser.add_argument('label', type=str, - default=None, + parser.add_argument('-L', '--label', type=str, + required=True, default=None, help='Label of database table [default: %(default)s]') parser.add_argument('-D', '--database', type=str, default="volk_results.db", @@ -112,9 +112,10 @@ def main(): parser.add_argument('-I', '--iterations', type=int, default=20, help='Number of iterations [default: %(default)s]') - parser.add_argument('--test', type=int, + parser.add_argument('--tests', type=int, nargs='*', choices=xrange(len(avail_tests)), - help='Test to run') + help='A list of tests to run; can be a single test or a \ + space-separated list.') parser.add_argument('--list', action='store_true', help='List the available tests') parser.add_argument('--all', action='store_true', diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py index 8041ccac1..893318ddd 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_types.py +++ b/gnuradio-examples/python/volk_benchmark/volk_types.py @@ -138,8 +138,8 @@ def main(): implementation. The results are stored to an SQLite database \ that can then be read by volk_plot.py to plot the differences.' parser = argparse.ArgumentParser(description=desc) - parser.add_argument('label', type=str, - default=None, + parser.add_argument('-L', '--label', type=str, + required=True, default=None, help='Label of database table [default: %(default)s]') parser.add_argument('-D', '--database', type=str, default="volk_results.db", @@ -150,9 +150,10 @@ def main(): parser.add_argument('-I', '--iterations', type=int, default=20, help='Number of iterations [default: %(default)s]') - parser.add_argument('--test', type=int, + parser.add_argument('--tests', type=int, nargs='*', choices=xrange(len(avail_tests)), - help='Test to run') + help='A list of tests to run; can be a single test or a \ + space-separated list.') parser.add_argument('--list', action='store_true', help='List the available tests') parser.add_argument('--all', action='store_true', @@ -171,16 +172,15 @@ def main(): conn = create_connection(args.database) new_table(conn, label) - if not args.all: - func = avail_tests[args.test] - res = run_tests(func, N, iters) + if args.all: + tests = xrange(len(avail_tests)) + else: + tests = args.tests + + for test in tests: + res = run_tests(avail_tests[test], N, iters) if res is not None: replace_results(conn, label, N, iters, res) - else: - for f in avail_tests: - res = run_tests(f, N, iters) - if res is not None: - replace_results(conn, label, N, iters, res) if __name__ == "__main__": try: -- cgit From f671319ca9ccef8fb1590e676ff6bcb85d7ca5a1 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 11 Feb 2012 09:06:45 -0500 Subject: core: reverting float_to_int to non-Volk due to precision/wrapping issues. Using the Volk function causes too much of a change in the output values right now. Will have to relook at it for the right thing to do. Keeping the use of vlen and scale, though. --- gnuradio-core/src/lib/general/gr_float_to_int.cc | 18 ++++++++++++++---- gnuradio-core/src/lib/general/gri_float_to_int.cc | 4 ++-- gnuradio-core/src/lib/general/gri_float_to_int.h | 2 +- .../src/python/gnuradio/gr/qa_float_to_int.py | 14 ++++++-------- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc index 28214538f..b69591043 100644 --- a/gnuradio-core/src/lib/general/gr_float_to_int.cc +++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc @@ -26,6 +26,7 @@ #include #include +#include #include gr_float_to_int_sptr @@ -56,12 +57,17 @@ gr_float_to_int::set_scale(float scale) { d_scale = scale; } - int gr_float_to_int::work (int noutput_items, gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { + // Disable the Volk for now. There is a problem for large 32-bit ints that + // are not properly represented by the precisions of a single float, which + // can cause wrapping from large, positive numbers to negative. + // In gri_float_to_int, the value is first promoted to a 64-bit + // value, clipped, then converted to a float. +#if 0 const float *in = (const float *) input_items[0]; int32_t *out = (int32_t *) output_items[0]; @@ -71,9 +77,13 @@ gr_float_to_int::work (int noutput_items, else { volk_32f_s32f_convert_32i_a(out, in, d_scale, d_vlen*noutput_items); } +#else + const float *in = (const float *) input_items[0]; + int *out = (int *) output_items[0]; + + gri_float_to_int (in, out, d_scale, d_vlen*noutput_items); + +#endif return noutput_items; } - - - diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.cc b/gnuradio-core/src/lib/general/gri_float_to_int.cc index 5271e60e2..0b0b10dfe 100644 --- a/gnuradio-core/src/lib/general/gri_float_to_int.cc +++ b/gnuradio-core/src/lib/general/gri_float_to_int.cc @@ -34,10 +34,10 @@ static const int64_t MIN_INT = -2147483647; // -(2^31)-1 void -gri_float_to_int (const float *in, int *out, int nsamples) +gri_float_to_int (const float *in, int *out, float scale, int nsamples) { for (int i = 0; i < nsamples; i++){ - int64_t r = llrintf(in[i]); + int64_t r = llrintf(scale * in[i]); if (r < MIN_INT) r = MIN_INT; else if (r > MAX_INT) diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.h b/gnuradio-core/src/lib/general/gri_float_to_int.h index a2f6ea877..d8b98efc1 100644 --- a/gnuradio-core/src/lib/general/gri_float_to_int.h +++ b/gnuradio-core/src/lib/general/gri_float_to_int.h @@ -28,6 +28,6 @@ /*! * convert array of floats to int with rounding and saturation. */ -GR_CORE_API void gri_float_to_int (const float *in, int *out, int nsamples); +GR_CORE_API void gri_float_to_int (const float *in, int *out, float scale, int nsamples); #endif /* INCLUDED_GRI_FLOAT_TO_INT_H */ diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py index 559f90f05..977a8518d 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py @@ -33,9 +33,7 @@ class test_float_to_int (gr_unittest.TestCase): def test_001(self): src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) - - ### Volk results - expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5] + expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6] src = gr.vector_source_f(src_data) op = gr.float_to_int() @@ -49,10 +47,10 @@ class test_float_to_int (gr_unittest.TestCase): def test_002(self): - src_data = ( 2146400000, 2147483647, - -2146400000, -2147483648 ) - expected_result = [ 2146400000, 2146400000, - -2146400000, -2147483648 ] + src_data = ( 2147483647, 2147483648, 2200000000, + -2147483648, -2147483649, -2200000000) + expected_result = [ 2147483647, 2147483647, 2147483647, + -2147483647, -2147483647, -2147483647] src = gr.vector_source_f(src_data) op = gr.float_to_int() dst = gr.vector_sink_i() @@ -69,7 +67,7 @@ class test_float_to_int (gr_unittest.TestCase): scale = 2 vlen = 3 src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) - expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6,] + expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7,] src = gr.vector_source_f(src_data) s2v = gr.stream_to_vector(gr.sizeof_float, vlen) op = gr.float_to_int(vlen, scale) -- cgit From 4589b6d6f062e92fd84965eaf47d3fc30bdf516e Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sat, 11 Feb 2012 12:27:45 -0500 Subject: volk: added some documentation to the Doxygen manual explaining Volk and how to use it. --- docs/doxygen/other/main_page.dox | 10 +++ docs/doxygen/other/volk_guide.dox | 161 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 docs/doxygen/other/volk_guide.dox diff --git a/docs/doxygen/other/main_page.dox b/docs/doxygen/other/main_page.dox index 0caa0b20f..68b098943 100644 --- a/docs/doxygen/other/main_page.dox +++ b/docs/doxygen/other/main_page.dox @@ -38,4 +38,14 @@ More details on packages in GNU Radio: \li \ref page_uhd \li \ref page_vocoder \li \ref page_pfb + +\section volk_main Using Volk in GNU Radio + +The \ref volk_guide page provides an overview of how to incorporate +and use Volk in GNU Radio blocks. + +Many blocks have already been converted to use Volk in their calls, so +they can also serve as examples. See the gr_complex_to_xxx.h file for +examples of various blocks that make use of Volk. + */ diff --git a/docs/doxygen/other/volk_guide.dox b/docs/doxygen/other/volk_guide.dox new file mode 100644 index 000000000..d898f3864 --- /dev/null +++ b/docs/doxygen/other/volk_guide.dox @@ -0,0 +1,161 @@ +/*! \page volk_guide Instructions for using Volk in GNU Radio + +\section volk_intro Introduction + +Volk is the Vector-Optimized Library of Kernels. It is a library that +contains kernels of hand-written SIMD code for different mathematical +operations. Since each SIMD architecture can be greatly different and +no compiler has yet come along to handle vectorization properly or +highly efficiently, Volk approaches the problem differently. For each +architecture or platform that a developer wishes to vectorize for, a +new proto-kernel is added to Volk. At runtime, Volk will select the +correct proto-kernel. In this way, the users of Volk call a kernel for +performing the operation that is platform/architecture agnostic. This +allows us to write portable SIMD code. + +Volk kernels are always defined with a 'generic' proto-kernel, which +is written in plain C. With the generic kernel, the kernel becomes +portable to any platform. Kernels are then extended by adding +proto-kernels for new platforms in which they are desired. + +A good example of a Volk kernel with multiple proto-kernels defined is +the volk_32f_s32f_multiply_32f_a. This kernel implements a scalar +multiplication of a vector of floating point numbers (each item in the +vector is multiplied by the same value). This kernel has the following +proto-kernels that are defined for 'generic,' 'avx,' 'sse,' and 'orc.' + +\code + void volk_32f_s32f_multiply_32f_a_generic + void volk_32f_s32f_multiply_32f_a_sse + void volk_32f_s32f_multiply_32f_a_avx + void volk_32f_s32f_multiply_32f_a_orc +\endcode + +These proto-kernels means that on platforms with AVX support, Volk can +select this option or the SSE option, depending on which is faster. On +other platforms, the ORC SIMD compiler might provide a solution. If +all else fails, Volk can fall back on the generic proto-kernel, which +will always work. + +Just a note on ORC. ORC is a SIMD compiler library that uses a generic +assembly-like language for SIMD commands. Based on the available SIMD +architecture of a system, it will try and compile a good +solution. Tests show that the results of ORC proto-kernels are +generally better than the generic versions but often not as good as +the hand-tuned proto-kernels for a specific SIMD architecture. This +is, of course, to be expected, and ORC provides a nice intermediary +step to performance improvements until a specific hand-tuned +proto-kernel can be made for a given platform. + +See Volk on +gnuradio.org for details on the Volk naming scheme. + + +\section volk_alignment Setting and Using Memory Alignment Information + +For Volk to work as best as possible, we want to use memory-aligned +SIMD calls, which means we have to have some way of knowing and +controlling the alignment of the buffers passed to gr_block's work +function. We set the alignment requirement for SIMD aligned memory +calls with: + +\code + const int alignment_multiple = + volk_get_alignment() / output_item_size; + set_alignment(alignment_multiple); +\endcode + +The Volk function 'volk_get_alignment' provides the alignment of the +the machine architecture. We then base the alignment on the number of +output items required to maintain the alignment, so we divide the +number of alignment bytes by the number of bytes in an output items +(sizeof(float), sizeof(gr_complex), etc.). This value is then set per +block with the 'set_alignment' function. + +Because the scheduler tries to optimize throughput, the number of +items available per call to work will change and depends on the +availability of the read and write buffers. This means that it +sometimes cannot produce a buffer that is properly memory +aligned. This is an inevitable consequence of the scheduler +system. Instead of requiring alignment, the scheduler enforces the +alignment as much as possible, and when a buffer becomes unaligned, +the scheduler will work to correct it as much as possible. If a +block's buffers are unaligned, then, the scheduler sets a flag to +indicate as much so that the block can then decide what best to +do. The next section discusses the use of the aligned/unaligned +information in a gr_block's work function. + + +\section volk_work Using Alignment Properties in Work() + +The buffers passed to work/general_work in a gr_block are not +guaranteed to be aligned, but they will mostly be aligned whenever +possible. When not aligned, the 'is_unaligned()' flag will be set. So +a block can know if its buffers are aligned and make the right +decisions. This looks like: + +\code +int +gr_some_block::work (int noutput_items, + gr_vector_const_void_star &input_items, + gr_vector_void_star &output_items) +{ + const float *in = (const float *) input_items[0]; + float *out = (float *) output_items[0]; + + if(is_unaligned()) { + // do something with unaligned data. This can either be a manual + // handling of the items or a call to an unaligned Volk function. + volk_32f_something_32f_u(out, in, noutput_items); + } + else { + // Buffers are aligned; can call the aligned Volk function. + volk_32f_something_32f_a(out, in, noutput_items); + } + + return noutput_items; +} +\endcode + + + +\section volk_tuning Tuning Volk Performance + +VOLK comes with a profiler that will build a config file for the best +SIMD architecture for your processor. Run volk_profile that is +installed into $PREFIX/bin. This program tests all known VOLK kernels +for each architecture supported by the processor. When finished, it +will write to $HOME/.volk/volk_config the best architecture for the +VOLK function. This file is read when using a function to know the +best version of the function to execute. + +\subsection volk_hand_tuning Hand-Tuning Performance + +If you know a particular architecture works best for your processor, +you can specify the particular architecture to use in the VOLK +preferences file: $HOME/.volk/volk_config + +The file looks like: + +\code + volk_ +\endcode + +Where the "FUNCTION_NAME" is the particular function that you want to +over-ride the default value and "ARCHITECTURE" is the VOLK SIMD +architecture to use (generic, sse, sse2, sse3, avx, etc.). For +example, the following config file tells VOLK to use SSE3 for the +aligned and unaligned versions of a function that multiplies two +complex streams together. + +\code + volk_32fc_x2_multiply_32fc_a sse3 + volk_32fc_x2_multiply_32fc_u sse3 +\endcode + +\b Tip: if benchmarking GNU Radio blocks, it can be useful to have a +volk_config file that sets all architectures to 'generic' as a way to +test the vectorized versus non-vectorized implementations. + +*/ -- cgit From ca8889bc5d83bf380832431ebb30c88ddef5a924 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 13 Feb 2012 14:47:42 -0500 Subject: volk: better handling of plot for error bars. Older versions of pylab don't like the kwargs. --- gnuradio-examples/python/volk_benchmark/volk_plot.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py index d7578c5a7..562d4f2f7 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_plot.py +++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py @@ -103,14 +103,16 @@ def main(): ydata.append(table_data[t][name]['avg']) if(args.errorbars is False): - stds = None - - s0.bar(x1, ydata, width=wdth, - yerr=stds, - color=colors[i%M], label=t, - edgecolor='k', linewidth=2, - error_kw={"ecolor": 'k', "capsize":5, - "linewidth":2}) + s0.bar(x1, ydata, width=wdth, + color=colors[i%M], label=t, + edgecolor='k', linewidth=2) + else: + s0.bar(x1, ydata, width=wdth, + yerr=stds, + color=colors[i%M], label=t, + edgecolor='k', linewidth=2, + error_kw={"ecolor": 'k', "capsize":5, + "linewidth":2}) s0.legend() s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']), -- cgit From 2597fe8ed82ee04c161c9f08534ae1b90d2b7d88 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 13 Feb 2012 14:50:14 -0500 Subject: core: change alignment requirement. --- gnuradio-core/src/lib/general/gr_char_to_short.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc index 13375412c..8b6cd0be1 100644 --- a/gnuradio-core/src/lib/general/gr_char_to_short.cc +++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc @@ -41,7 +41,7 @@ gr_char_to_short::gr_char_to_short (size_t vlen) d_vlen(vlen) { const int alignment_multiple = - volk_get_alignment() / sizeof(short); + volk_get_alignment() / sizeof(char); set_alignment(alignment_multiple); } -- cgit From e36d6f1f766e702d147ca494e21131cc66f157dd Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 13 Feb 2012 16:10:14 -0500 Subject: volk: can specify a table to calculate the percent improvement against instead of just the raw numbers. --- .../python/volk_benchmark/volk_plot.py | 80 ++++++++++++++++------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py index 562d4f2f7..823dfbf64 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_plot.py +++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py @@ -25,11 +25,15 @@ def main(): choices=['mean', 'min', 'max'], default='mean', help='Set the type of plot to produce [default: %(default)s]') + parser.add_argument('-%', '--percent', type=str, + default=None, metavar="table", + help='Show percent difference to the given type [default: %(default)s]') args = parser.parse_args() # Set up global plotting properties matplotlib.rcParams['figure.subplot.bottom'] = 0.2 matplotlib.rcParams['figure.subplot.top'] = 0.95 + matplotlib.rcParams['figure.subplot.right'] = 0.98 matplotlib.rcParams['ytick.labelsize'] = 16 matplotlib.rcParams['xtick.labelsize'] = 16 matplotlib.rcParams['legend.fontsize'] = 18 @@ -39,9 +43,6 @@ def main(): tables = list_tables(conn) M = len(tables) - # width of bars depends on number of comparisons - wdth = 0.80/M - # Colors to distinguish each table in the bar graph # More than 5 tables will wrap around to the start. colors = ['b', 'r', 'g', 'm', 'k'] @@ -85,12 +86,23 @@ def main(): table_data[table[0]] = data + if args.percent is not None: + for i,t in enumerate(table_data): + if args.percent == t: + norm_data = [] + for name in name_reg: + if(args.plot == 'max'): + norm_data.append(table_data[t][name]['max']) + elif(args.plot == 'min'): + norm_data.append(table_data[t][name]['min']) + elif(args.plot == 'mean'): + norm_data.append(table_data[t][name]['avg']) + + # Plot the results x0 = xrange(len(name_reg)) - for i,t in enumerate(table_data): - # makes x values for this data set placement - x1 = [x + i*wdth for x in x0] - + i = 0 + for t in (table_data): ydata = [] stds = [] for name in name_reg: @@ -99,24 +111,52 @@ def main(): ydata.append(table_data[t][name]['max']) elif(args.plot == 'min'): ydata.append(table_data[t][name]['min']) - if(args.plot == 'mean'): + elif(args.plot == 'mean'): ydata.append(table_data[t][name]['avg']) - if(args.errorbars is False): - s0.bar(x1, ydata, width=wdth, - color=colors[i%M], label=t, - edgecolor='k', linewidth=2) + if args.percent is not None: + ydata = [-100*(y-n)/y for y,n in zip(ydata,norm_data)] + if(args.percent != t): + # makes x values for this data set placement + # width of bars depends on number of comparisons + wdth = 0.80/(M-1) + x1 = [x + i*wdth for x in x0] + i += 1 + + s0.bar(x1, ydata, width=wdth, + color=colors[(i-1)%M], label=t, + edgecolor='k', linewidth=2) + else: - s0.bar(x1, ydata, width=wdth, - yerr=stds, - color=colors[i%M], label=t, - edgecolor='k', linewidth=2, - error_kw={"ecolor": 'k', "capsize":5, - "linewidth":2}) + # makes x values for this data set placement + # width of bars depends on number of comparisons + wdth = 0.80/M + x1 = [x + i*wdth for x in x0] + i += 1 + + if(args.errorbars is False): + s0.bar(x1, ydata, width=wdth, + color=colors[(i-1)%M], label=t, + edgecolor='k', linewidth=2) + else: + s0.bar(x1, ydata, width=wdth, + yerr=stds, + color=colors[i%M], label=t, + edgecolor='k', linewidth=2, + error_kw={"ecolor": 'k', "capsize":5, + "linewidth":2}) + + nitems = res[0]['nitems'] + if args.percent is None: + s0.set_ylabel("Processing time (sec) [{0:G} items]".format(nitems), + fontsize=22, fontweight='bold', + horizontalalignment='center') + else: + s0.set_ylabel("% Improvement over {0} [{1:G} items]".format( + args.percent, nitems), + fontsize=22, fontweight='bold') s0.legend() - s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']), - fontsize=22, fontweight='bold') s0.set_xticks(x0) s0.set_xticklabels(name_reg) for label in s0.xaxis.get_ticklabels(): -- cgit From ef1748e4efc40cc065fb5f1b40d710256dd37efa Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Mon, 13 Feb 2012 20:53:51 -0500 Subject: volk: complex_to_arg doesn't actually use Volk. No need to benchmark it. --- gnuradio-examples/python/volk_benchmark/volk_types.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py index 893318ddd..3bc5a22ae 100755 --- a/gnuradio-examples/python/volk_benchmark/volk_types.py +++ b/gnuradio-examples/python/volk_benchmark/volk_types.py @@ -97,12 +97,6 @@ def complex_to_mag_squared(N): ###################################################################### -def complex_to_arg(N): - op = gr.complex_to_arg() - tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1) - return tb - -###################################################################### def run_tests(func, N, iters): print("Running Test: {0}".format(func.__name__)) @@ -128,8 +122,7 @@ def main(): complex_to_real, complex_to_imag, complex_to_mag, - complex_to_mag_squared, - complex_to_arg] + complex_to_mag_squared] desc='Time an operation to compare with other implementations. \ This program runs a simple GNU Radio flowgraph to test a \ -- cgit From ba3f1a4e8d5879c91eb5c47cc7e7c3ac73b1989d Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 14 Feb 2012 17:20:11 -0500 Subject: volk: added README file to explain how to run the benchmark tests and plotting tool. --- gnuradio-examples/python/volk_benchmark/README | 252 +++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 gnuradio-examples/python/volk_benchmark/README diff --git a/gnuradio-examples/python/volk_benchmark/README b/gnuradio-examples/python/volk_benchmark/README new file mode 100644 index 000000000..516fc15bd --- /dev/null +++ b/gnuradio-examples/python/volk_benchmark/README @@ -0,0 +1,252 @@ +VOLK Benchmarking Scripts + +The Python programs in this directory are designed to help benchmark +and compare Volk enhancements to GNU Radio. There are two kinds of +scripts here: collecting data and displaying the data. + +Data collection is done by running a Volk testing script that will +populate a SQLite database file (volk_results.db by default). The +plotting utility provided here reads from the database files and plots +bar graphs to compare the different installations. + +These benchmarks can be used to compare previous versions of GNU +Radio to using Volk; they can be used to compare different Volk +proto-kernels, as well, by editing the volk_config file; or they could +be used to compare performance between different machines and/or +processors. + + +====================================================================== +Volk Profiling + +Before doing any kind of Volk benchmarking, it is important to run the +volk_profile program. The profiler will build a config file for the +best SIMD architecture for your processor. Run volk_profile that is +installed into $PREFIX/bin. This program tests all known Volk kernels +for each proto-kernel supported by the processor. When finished, it +will write to $HOME/.volk/volk_config the best architecture for the +VOLK function. This file is read when using a function to know the +best version of the function to execute. + +The volk_config file contains a line for each kernel, where each line +looks like: + + volk_ + +The architecture will be something like (sse, sse2, sse3, avx, neon, +etc.), depending on your processor. + + +====================================================================== +Benchmark Tests + +There are currently two benchmark scripts defined for collecting +data. There is one that runs through the type conversions that have +been converted to Volk (volk_types.py) and the other runs through the +math operators converted to using Volk (volk_math.py). + +Script prototypes +Both have the same structure for use: + +---------------------------------------------------------------------- +./volk_.py [-h] -L LABEL [-D DATABASE] [-N NITEMS] [-I ITERATIONS] + [--tests [{0,1,2,3} [{0,1,2,3} ...]]] [--list] + [--all] + +optional arguments: + -h, --help show this help message and exit + -L LABEL, --label LABEL + Label of database table [default: None] + -D DATABASE, --database DATABASE + Database file to store data in [default: + volk_results.db] + -N NITEMS, --nitems NITEMS + Number of items per iterations [default: 1000000000.0] + -I ITERATIONS, --iterations ITERATIONS + Number of iterations [default: 20] + --tests [{0,1,2,3} [{0,1,2,3} ...]] + A list of tests to run; can be a single test or a + space-separated list. + --list List the available tests + --all Run all tests +---------------------------------------------------------------------- + +To run, you specify the tests to run and a label to store along with +the results. To find out what the available tests are, use the +'--list' option. + +To specify a subset of tests, use the '--tests' with space-separated +list of tests numbers (e.g., --tests 0 2 4 9). + +Use the '--all' to run all tests. + +The label specified is used as an identifier for the benchmarking +currently being done. This is required as it is important in +organizing the data in the database (each label is its own +table). Usually, the label will specify the type of run being done, +such as "volk_aligned" or "v3_5_1". In these cases, the "volk_aligned" +label says that this is for a benchmarking using the GNU Radio version +that uses the aligned scheduler and Volk calls in the work +functions. The "v3_5_1" label is if you were benchmarking an installed +version 3.5.1 of GNU Radio, which is pre-Volk. These will then be +plotted against each other to see the timing differences. + +The 'database' option will output the results to a new database +file. This can be useful for separating the output of different runs +or of different benchmarks, such as the types versus the math scripts, +say, or to distinguish results from different computers. + +If rerun using the same database and label, the entries in the table +will simply be replaced by the new results. + +It is often useful to use the 'sqlitebrowser' program to interrogate +the database file farther, if you are interested in the structure or +the raw data. + +Other parameters of this script set the number of items to process and +number of iterations to use when computing the benchmarking +data. These default to 1 billion samples per iteration over 20 +iterations. Expect a default run to take a long time. Using the '-N' +and '-I' options can be used to change the runtime of the benchmarks +but are set high to remove problems of variance between iterations. + +====================================================================== +Plotting Results + +The volk_plot.py script reads a given database file and plots the +results. The default behavior is to read all of the labels stored in +the database and plot them as data sets on a bar graph. This shows the +average time taken to process the number of items given. + +The options for the plotting script are: + +usage: volk_plot.py [-h] [-D DATABASE] [-E] [-P {mean,min,max}] [-% table] + +Plot Volk performance results from a SQLite database. Run one of the volk +tests first (e.g, volk_math.py) + +---------------------------------------------------------------------- +optional arguments: + -h, --help show this help message and exit + -D DATABASE, --database DATABASE + Database file to read data from [default: + volk_results.db] + -E, --errorbars Show error bars (1 standard dev.) + -P {mean,min,max}, --plot {mean,min,max} + Set the type of plot to produce [default: mean] + -% table, --percent table + Show percent difference to the given type [default: + None] +---------------------------------------------------------------------- + +This script allows you to specify the database used (-D), but will +always read all rows from all tables from it and display them. You can +also turn on plotting error bars (1 standard deviation the mean). Be +careful, though, as some older versions of Matplotlib might have an +issue with this option. + +The mean time is only one possible statistic that we might be +interested in when looking at the data. It represents the average user +experience when running a given block. On the other hand, the minimum +runtime best represents the actual performance of a block given +minimal OS interruptions while running. Right now, the data collected +includes the mean, variance, min, and max over the number of +iterations given. Using the '-P' option, you can specify the type of +data to plot (mean, min, or max). + +Another useful way of looking at the data is to compare the percent +improvement of a benchmark compared to another. This is done using the +'-%' option with the provided table (or label) as the baseline. So if +we were interested in comparing how much the 'volk_aligned' was over +'v3_5_1', we would specify '-% v3_5_1' to see this. The plot would +then only show the percent speedup observed using Volk for each of the +blocks. + + +====================================================================== +Benchmarking Walkthrough + +This will walk through an example of benchmarking the new Volk +implementation versus the pre-Volk GNU Radio. It also shows how to +look at the SIMD optimized versions versus the generic +implementations. + +Since we introduced Volk in GNU Radio 3.5.2, we will use the following +labels for our data: + + 1.) volk_aligned: v3.5.2 with volk_profile results in .volk/volk_config + 2.) v3_5_2: v3.5.2 with the generic (non-SIMD) calls to Volk + 3.) v3_5_1: an installation of GNU Radio from version v3.5.1 + +We assume that we have installed two versions of GNU Radio. + + v3.5.2 installed into /opt/gr-3_5_2 + v3.5.1 installed into /opt/gr-3_5_1 + +To test cases 1 and 2 above, we have to run GNU Radio from the v3.5.2 +installation, so we set the following environmental variables. Note +that this is written for Ubuntu 11.10. These commands and directories +may have to be changed depending on your OS and versions. + + export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib + export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib/python2.7/dist-packages + +Now we can run the benchmark tests, so we will focus on the math +operators: + + ./volk_math.py -D volk_results_math.db --all -L volk_aligned + +When this finishes, the 'volk_results_math.db' will contain our +results for this run. + +We next want to run the generic, non-SIMD, calls. This can be done by +changing the Volk kernel settings in $HOME/.volk/volk_config. First, +make a backup of this file. Then edit it and change all architecture +calls (sse, sse2, etc.) to 'generic.' Now, Volk will only call the +generic versions of these functions. So we rerun the benchmark with: + + ./volk_math.py -D volk_results_math.db --all -L v3_5_2 + +Notice that the only thing changed here was the label to 'v3_5_2'. + +Next, we want to collect data for the non-Volk version of GNU +Radio. This is important because some internals to GNU Radio were made +when adding support for Volk, so it is nice to know what the +differences do to our performance. First, we set the environmental +variables to point to the v3.5.1 installation: + + export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib + export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib/python2.7/dist-packages + +And when we run the test, we use the same command line, but the GNU +Radio libraries and Python files used come from v3.5.1. We also change +the label to indicate the different version to store. + + ./volk_math.py -D volk_results_math.db --all -L v3_5_1 + +We now have a database populated with three tables for the three +different labels. We can plot them all together by simply running: + + ./volk_plot.py -D volk_results_math.db + +This will show the average run times for each of the three +configurations for all math functions tested. We might also be +interested to see the difference in performance from the v3.5.1 +version, so we can run: + + ./volk_plot.py -D volk_results_math.db -% v3_5_1 + +That will plot both the 'volk_aligned' and 'v3_5_2' as a percentage +improvement over v3_5_1. A positive value indicates that this version +runs faster than the v3.5.1 version. + + +---------------------------------------------------------------------- + +Another interesting test case could be to compare results on different +processors. So if you have different generation Intels, AMD, or +whatever, you can simply pass the .db file around and run the Volk +benchmark script to populate the database with different results. For +this, you would specify a label like '-L i7_2620M' that indicates the +processor type to uniquely ID the data. + -- cgit From 2eaa0a6e1e57cfc374c258c317ecb469fc49bf53 Mon Sep 17 00:00:00 2001 From: Johnathan Corgan Date: Tue, 14 Feb 2012 15:37:57 -0800 Subject: build: fix autotools for gnuradio-core volkification --- config/grc_volk.m4 | 7 +++---- gnuradio-core/src/lib/Makefile.am | 1 + gnuradio-core/src/lib/general/Makefile.am | 8 ++++---- gnuradio-core/src/lib/gengen/Makefile.gen | 9 --------- gnuradio-core/src/lib/gengen/generate_common.py | 2 +- grc/blocks/Makefile.am | 2 -- volk/include/volk/Makefile.am | 4 ++-- 7 files changed, 11 insertions(+), 22 deletions(-) diff --git a/config/grc_volk.m4 b/config/grc_volk.m4 index f349d5e99..cc6d75649 100644 --- a/config/grc_volk.m4 +++ b/config/grc_volk.m4 @@ -26,11 +26,10 @@ AC_DEFUN([GRC_VOLK],[ dnl Test if $enable_volk is: dnl yes : if the --enable code passed muster and all dependencies are met dnl no : otherwise, then do not set variables - if test $passed != with && test x$enable_volk == xyes; then + if test $passed != with && test x$enable_volk != xno; then dnl how and where to find INCLUDES and LA - volk_INCLUDES="-I\${abs_top_srcdir}/volk/include" - volk_LA="\${abs_top_builddir}/volk/lib/libvolk.la \ - \${abs_top_builddir}/volk/lib/libvolk_runtime.la" + volk_INCLUDES="-I\${abs_top_srcdir}/volk/gen/include -I\${abs_top_srcdir}/volk/include" + volk_LA="\${abs_top_builddir}/volk/lib/libvolk.la" fi dnl volk uses a subsidiary configure.ac diff --git a/gnuradio-core/src/lib/Makefile.am b/gnuradio-core/src/lib/Makefile.am index fc1b7917b..21e721073 100644 --- a/gnuradio-core/src/lib/Makefile.am +++ b/gnuradio-core/src/lib/Makefile.am @@ -51,6 +51,7 @@ libgnuradio_core_la_LIBADD = \ runtime/libruntime.la \ hier/libhier.la \ $(GRUEL_LA) \ + $(VOLK_LA) \ $(FFTW3F_LIBS) \ $(GSL_LIBS) \ $(CBLAS_LIBS) \ diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am index ea3b31fd3..5b4a702e1 100644 --- a/gnuradio-core/src/lib/general/Makefile.am +++ b/gnuradio-core/src/lib/general/Makefile.am @@ -129,8 +129,8 @@ libgeneral_la_SOURCES = \ gr_reverse.cc \ gr_rms_cf.cc \ gr_rms_ff.cc \ + gr_short_to_char.cc \ gr_short_to_float.cc \ - gr_short_to_int.cc \ gr_int_to_float.cc \ gr_simple_correlator.cc \ gr_simple_framer.cc \ @@ -194,8 +194,8 @@ libgeneral_qa_la_SOURCES = \ grinclude_HEADERS = \ gr_core_api.h \ complex_vec_test.h \ - gr_add.h \ gr_additive_scrambler_bb.h \ + gr_add_ff.h \ gr_agc_cc.h \ gr_agc_ff.h \ gr_agc2_cc.h \ @@ -339,7 +339,7 @@ grinclude_HEADERS = \ gri_int_to_float.h \ gri_lfsr_15_1_0.h \ gri_lfsr_32k.h \ - gri_short_to_char.h \ + gri_short_to_float.h \ gri_uchar_to_float.h \ malloc16.h \ random.h \ @@ -368,8 +368,8 @@ noinst_HEADERS = \ swiginclude_HEADERS = \ complex_vec_test.i \ general.i \ - gr_add.i \ gr_additive_scrambler_bb.i \ + gr_add_ff.i \ gr_agc_cc.i \ gr_agc_ff.i \ gr_agc2_cc.i \ diff --git a/gnuradio-core/src/lib/gengen/Makefile.gen b/gnuradio-core/src/lib/gengen/Makefile.gen index fb7b21e24..db260585f 100644 --- a/gnuradio-core/src/lib/gengen/Makefile.gen +++ b/gnuradio-core/src/lib/gengen/Makefile.gen @@ -12,7 +12,6 @@ GENERATED_H = \ gr_add_const_vff.h \ gr_add_const_vii.h \ gr_add_const_vss.h \ - gr_add_ff.h \ gr_add_ii.h \ gr_add_ss.h \ gr_and_bb.h \ @@ -45,14 +44,12 @@ GENERATED_H = \ gr_moving_average_ff.h \ gr_moving_average_ii.h \ gr_moving_average_ss.h \ - gr_multiply_const_ff.h \ gr_multiply_const_ii.h \ gr_multiply_const_ss.h \ gr_multiply_const_vcc.h \ gr_multiply_const_vff.h \ gr_multiply_const_vii.h \ gr_multiply_const_vss.h \ - gr_multiply_ff.h \ gr_multiply_ii.h \ gr_multiply_ss.h \ gr_mute_cc.h \ @@ -115,7 +112,6 @@ GENERATED_I = \ gr_add_const_vff.i \ gr_add_const_vii.i \ gr_add_const_vss.i \ - gr_add_ff.i \ gr_add_ii.i \ gr_add_ss.i \ gr_and_bb.i \ @@ -148,14 +144,12 @@ GENERATED_I = \ gr_moving_average_ff.i \ gr_moving_average_ii.i \ gr_moving_average_ss.i \ - gr_multiply_const_ff.i \ gr_multiply_const_ii.i \ gr_multiply_const_ss.i \ gr_multiply_const_vcc.i \ gr_multiply_const_vff.i \ gr_multiply_const_vii.i \ gr_multiply_const_vss.i \ - gr_multiply_ff.i \ gr_multiply_ii.i \ gr_multiply_ss.i \ gr_mute_cc.i \ @@ -218,7 +212,6 @@ GENERATED_CC = \ gr_add_const_vff.cc \ gr_add_const_vii.cc \ gr_add_const_vss.cc \ - gr_add_ff.cc \ gr_add_ii.cc \ gr_add_ss.cc \ gr_and_bb.cc \ @@ -251,14 +244,12 @@ GENERATED_CC = \ gr_moving_average_ff.cc \ gr_moving_average_ii.cc \ gr_moving_average_ss.cc \ - gr_multiply_const_ff.cc \ gr_multiply_const_ii.cc \ gr_multiply_const_ss.cc \ gr_multiply_const_vcc.cc \ gr_multiply_const_vff.cc \ gr_multiply_const_vii.cc \ gr_multiply_const_vss.cc \ - gr_multiply_ff.cc \ gr_multiply_ii.cc \ gr_multiply_ss.cc \ gr_mute_cc.cc \ diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py index 616cc4b06..6da2044e0 100755 --- a/gnuradio-core/src/lib/gengen/generate_common.py +++ b/gnuradio-core/src/lib/gengen/generate_common.py @@ -66,7 +66,7 @@ others = ( ('gr_peak_detector_XX', ('fb','ib','sb')), ('gr_multiply_XX', ('ss','ii')), ('gr_multiply_const_XX', ('ss','ii')), - ('gr_add_XX', ('ss','cc')) + ('gr_add_XX', ('ss','cc','ii')) ) diff --git a/grc/blocks/Makefile.am b/grc/blocks/Makefile.am index 738e79f24..104ba8062 100644 --- a/grc/blocks/Makefile.am +++ b/grc/blocks/Makefile.am @@ -28,8 +28,6 @@ dist_ourdata_DATA = \ band_reject_filter.xml \ blks2_am_demod_cf.xml \ blks2_analysis_filterbank.xml \ - blks2_cvsd_encode.xml \ - blks2_cvsd_decode.xml \ blks2_error_rate.xml \ blks2_fm_deemph.xml \ blks2_fm_demod_cf.xml \ diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index f6b5835b1..a01ddf193 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -59,8 +59,8 @@ volkinclude_HEADERS = \ volk_32fc_32f_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_a.h \ volk_32fc_s32fc_multiply_32fc_u.h \ - volk_32fc_s32fc_multiply_conjugate_32fc_a.h \ - volk_32fc_s32fc_multiply_conjugate_32fc_u.h \ + volk_32fc_x2_multiply_conjugate_32fc_a.h \ + volk_32fc_x2_multiply_conjugate_32fc_u.h \ volk_32fc_s32f_power_32fc_a.h \ volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \ volk_32fc_s32f_atan2_32f_a.h \ -- cgit From fa8ab7cb146287a9f0d8db67e3126ab4a867a201 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 21 Feb 2012 15:41:27 -0800 Subject: Volk: add scalar const support to the profiler/QA code. Disabled volk_32fc_s32fc_multiply_32fc_a's Orc impl due to it not working. --- volk/apps/volk_profile.cc | 4 +-- .../include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 6 ++-- volk/lib/qa_utils.cc | 32 +++++++++++++++++++--- volk/lib/qa_utils.h | 5 +++- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 10a699872..5ad7727aa 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -102,8 +102,8 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results); VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); - VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, lv_32fc_t(1.0, 0.5), 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 1.0, 204600, 10000, &results); char path[256]; get_config_path(path); diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h index b27a7259f..75cf8c8b2 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h @@ -34,9 +34,9 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points); -static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ - volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points); -} +//static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ +// volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points); +//} #endif /* LV_HAVE_ORC */ diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 9bb515e9f..bb37801c9 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -198,6 +198,18 @@ inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector &buf while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } +inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + template bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; @@ -246,7 +258,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, - float scalar, + lv_32fc_t scalar, int vlen, int iter, std::vector *best_arch_vector = 0 @@ -316,21 +328,33 @@ bool run_volk_tests(struct volk_func_desc desc, if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + if(inputsc[0].is_complex) { + run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + if(inputsc[0].is_complex) { + run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + if(inputsc[0].is_complex) { + run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } } else throw "unsupported 3 arg function >1 scalars"; break; case 4: diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index a1bc1f20c..b998df852 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -21,7 +21,7 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector *); +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector *); #define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } #define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) @@ -32,5 +32,8 @@ typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*); +typedef void (*volk_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*); #endif //VOLK_QA_UTILS_H -- cgit From 330cddf31208b843c0997c6fb05cb3facf31f536 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 22 Feb 2012 08:46:55 -0800 Subject: Remove ORC invocation since // doesn't dissuade the generator. --- volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h index 75cf8c8b2..665fad47a 100644 --- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h +++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h @@ -25,20 +25,6 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c } #endif /* LV_HAVE_GENERIC */ -#ifdef LV_HAVE_ORC - /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector - */ -extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points); -//static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ -// volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points); -//} -#endif /* LV_HAVE_ORC */ - -- cgit From e8d644872837f4cbfc05851710531b2ac5259806 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 23 Feb 2012 14:28:21 -0500 Subject: volk: float to short conversion is consistent between archs and tail cases. Rounds to nearest number. --- gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py | 7 ++----- volk/include/volk/volk_32f_s32f_convert_16i_a.h | 15 ++++++++------- volk/include/volk/volk_32f_s32f_convert_16i_u.h | 15 ++++++++------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py index 926f1c08b..0d89a149c 100755 --- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py +++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py @@ -34,10 +34,7 @@ class test_float_to_short (gr_unittest.TestCase): def test_001(self): src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5) - expected_result = [int(round(s)) for s in src_data] - - ### Volk results - expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5] + expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6] src = gr.vector_source_f(src_data) op = gr.float_to_short() @@ -71,7 +68,7 @@ class test_float_to_short (gr_unittest.TestCase): scale = 2 vlen = 3 src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3) - expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6] + expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7] src = gr.vector_source_f(src_data) s2v = gr.stream_to_vector(gr.sizeof_float, vlen) op = gr.float_to_short(vlen, scale) diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h index 10c921b08..a24959678 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h @@ -4,6 +4,7 @@ #include #include #include +#include #ifdef LV_HAVE_SSE2 #include @@ -57,7 +58,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const r = max_val; else if(r < min_val) r = min_val; - outputVector[number] = (int16_t)(r); + outputVector[number] = (int16_t)rintf(r); } } #endif /* LV_HAVE_SSE2 */ @@ -98,10 +99,10 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); } number = quarterPoints * 4; @@ -111,7 +112,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const r = max_val; else if(r < min_val) r = min_val; - outputVector[number] = (int16_t)(r); + outputVector[number] = (int16_t)rintf(r); } } #endif /* LV_HAVE_SSE */ @@ -138,7 +139,7 @@ static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, co r = min_val; else if(r > max_val) r = max_val; - *outputVectorPtr++ = (int16_t)(r); + *outputVectorPtr++ = (int16_t)rintf(r); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index f339a7d10..f58158041 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef LV_HAVE_SSE2 #include @@ -57,7 +58,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const r = max_val; else if(r < min_val) r = min_val; - outputVector[number] = (int16_t)(r); + outputVector[number] = (int16_t)rintf(r); } } #endif /* LV_HAVE_SSE2 */ @@ -99,10 +100,10 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); _mm_store_ps(outputFloatBuffer, ret); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]); - *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); + *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); } number = quarterPoints * 4; @@ -112,7 +113,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const r = max_val; else if(r < min_val) r = min_val; - outputVector[number] = (int16_t)(r); + outputVector[number] = (int16_t)rintf(r); } } #endif /* LV_HAVE_SSE */ @@ -140,7 +141,7 @@ static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, co r = max_val; else if(r < min_val) r = min_val; - *outputVectorPtr++ = (int16_t)(r); + *outputVectorPtr++ = (int16_t)rintf(r); } } #endif /* LV_HAVE_GENERIC */ -- cgit