From 6ac7576d32b8be601843c871327856ebd2b965d6 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 22 Jan 2012 17:44:04 -0500
Subject: sched: first attempt at working with aligned data sets. A block has
 an indicator on whether or not the buffers are aligned; they can use this to
 determine which Volk function to use or if to use Volk at all.

---
 gnuradio-core/src/lib/runtime/gr_block.cc          | 30 ++++++++++++++++
 gnuradio-core/src/lib/runtime/gr_block.h           | 28 +++++++++++++++
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 41 ++++++++++++++++++++--
 3 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/gnuradio-core/src/lib/runtime/gr_block.cc b/gnuradio-core/src/lib/runtime/gr_block.cc
index 9463869f5..78f77486b 100644
--- a/gnuradio-core/src/lib/runtime/gr_block.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block.cc
@@ -34,6 +34,9 @@ gr_block::gr_block (const std::string &name,
 		    gr_io_signature_sptr output_signature)
   : gr_basic_block(name, input_signature, output_signature),
     d_output_multiple (1),
+    d_output_multiple_set(false),
+    d_unaligned(0),
+    d_is_unaligned(false),
     d_relative_rate (1.0),
     d_history(1),
     d_fixed_rate(false),
@@ -75,9 +78,36 @@ gr_block::set_output_multiple (int multiple)
   if (multiple < 1)
     throw std::invalid_argument ("gr_block::set_output_multiple");
 
+  d_output_multiple_set = true;
   d_output_multiple = multiple;
 }
 
+void
+gr_block::set_alignment (int multiple)
+{
+  if (multiple < 1)
+    throw std::invalid_argument ("gr_block::set_alignment_multiple");
+
+  d_output_multiple = multiple;
+}
+
+void
+gr_block::set_unaligned (int na)
+{
+  // unaligned value must be less than 0 and it doesn't make sense
+  // that it's larger than the alignment value.
+  if ((na < 0) || (na > d_output_multiple))
+    throw std::invalid_argument ("gr_block::set_unaligned");
+
+  d_unaligned = na;
+}
+
+void 
+gr_block::set_is_unaligned (bool u)
+{
+  d_is_unaligned = u;
+}
+
 void
 gr_block::set_relative_rate (double relative_rate)
 {
diff --git a/gnuradio-core/src/lib/runtime/gr_block.h b/gnuradio-core/src/lib/runtime/gr_block.h
index 86e0583e9..9171942e0 100644
--- a/gnuradio-core/src/lib/runtime/gr_block.h
+++ b/gnuradio-core/src/lib/runtime/gr_block.h
@@ -152,7 +152,32 @@ class GR_CORE_API gr_block : public gr_basic_block {
    */
   void set_output_multiple (int multiple);
   int  output_multiple () const { return d_output_multiple; }
+  bool  output_multiple_set () const { return d_output_multiple_set; }
 
+  /*!
+   * \brief Constrains buffers to work on a set item alignment (for SIMD)
+   *
+   * set_alignment_multiple causes the scheduler to ensure that the noutput_items
+   * argument passed to forecast and general_work will be an integer multiple
+   * of \param multiple  The default value is 1.
+   *
+   * This control is similar to the output_multiple setting, except
+   * that if the number of items passed to the block is less than the
+   * output_multiple, this value is ignored and the block can produce
+   * like normal. The d_unaligned value is set to the number of items
+   * the block is off by. In the next call to general_work, the
+   * noutput_items is set to d_unaligned or less until
+   * d_unaligned==0. The buffers are now aligned again and the aligned
+   * calls can be performed again.
+   */
+  void set_alignment (int multiple);
+  int  alignment () const { return d_output_multiple; }
+
+  void set_unaligned (int na);
+  int unaligned () const { return d_unaligned; }
+  void set_is_unaligned (bool u);
+  bool is_unaligned () const { return d_is_unaligned; }
+  
   /*!
    * \brief Tell the scheduler \p how_many_items of input stream \p which_input were consumed.
    */
@@ -231,6 +256,9 @@ class GR_CORE_API gr_block : public gr_basic_block {
  private:
 
   int                   d_output_multiple;
+  bool                  d_output_multiple_set;
+  int                   d_unaligned;
+  bool                  d_is_unaligned;
   double                d_relative_rate;	// approx output_rate / input_rate
   gr_block_detail_sptr	d_detail;		// implementation details
   unsigned              d_history;
diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index ef53baf78..1c52c0e13 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -36,7 +36,7 @@
 #include <stdio.h>
 
 // must be defined to either 0 or 1
-#define ENABLE_LOGGING 0
+#define ENABLE_LOGGING 1
 
 #if (ENABLE_LOGGING)
 #define LOG(x) do { x; } while(0)
@@ -183,6 +183,7 @@ gr_block_executor::run_one_iteration()
   int			noutput_items;
   int			max_items_avail;
   int                   max_noutput_items = d_max_noutput_items;
+  int                   new_alignment;
 
   gr_block		*m = d_block.get();
   gr_block_detail	*d = m->detail().get();
@@ -284,7 +285,7 @@ gr_block_executor::run_one_iteration()
     }
 
     // determine the minimum available output space
-    noutput_items = min_available_space (d, m->output_multiple ());
+    noutput_items = min_available_space (d, m->output_multiple ()) - m->output_multiple();
     if (ENABLE_LOGGING){
       *d_log << " regular ";
       if (m->relative_rate() >= 1.0)
@@ -307,7 +308,11 @@ gr_block_executor::run_one_iteration()
       // try to work it forward starting with max_items_avail.
       // We want to try to consume all the input we've got.
       int reqd_noutput_items = m->fixed_rate_ninput_to_noutput(max_items_avail);
-      reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple());
+      
+      // only test this if we specifically set the output_multiple
+      if(m->output_multiple_set())
+	reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple());
+
       if (reqd_noutput_items > 0 && reqd_noutput_items <= noutput_items)
 	noutput_items = reqd_noutput_items;
 
@@ -316,6 +321,30 @@ gr_block_executor::run_one_iteration()
     }
     noutput_items = std::min(noutput_items, max_noutput_items);
 
+    // Check if we're still unaligned; only use up items until we're
+    // aligned again. Otherwise, make sure we set the alignment
+    // requirement.
+    if(m->is_unaligned()) {
+      if(noutput_items >= m->unaligned()) {
+	noutput_items = round_up(noutput_items, m->alignment()) \
+	  - (m->alignment() - m->unaligned());
+	new_alignment = 0;
+      }
+      else {
+	new_alignment = m->unaligned() - noutput_items;
+      }
+    }
+    else if(noutput_items < m->alignment()) {
+      //m->set_unaligned(m->alignment());
+      new_alignment = m->alignment() - noutput_items;
+      m->set_unaligned(new_alignment);
+      m->set_is_unaligned(true);
+    }
+    else {
+      noutput_items = round_down(noutput_items, m->alignment());
+      m->set_is_unaligned(false);
+    }
+
     // ask the block how much input they need to produce noutput_items
     m->forecast (noutput_items, d_ninput_items_required);
 
@@ -379,6 +408,12 @@ gr_block_executor::run_one_iteration()
     LOG(*d_log << "  general_work: noutput_items = " << noutput_items
 	<< " result = " << n << std::endl);
 
+    // Adjust number of unaligned items left to process
+    if(m->is_unaligned()) {
+      m->set_unaligned(new_alignment);
+      m->set_is_unaligned(m->unaligned() != 0);
+    }
+
     if(!propagate_tags(m->tag_propagation_policy(), d,
 		       d_start_nitems_read, m->relative_rate(),
 		       d_returned_tags))
-- 
cgit 


From 32cbee3e091021741bb2bcf0d4a8bddd6581c8ab Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 22 Jan 2012 17:46:11 -0500
Subject: core: link to Volk.

---
 gnuradio-core/src/lib/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gnuradio-core/src/lib/CMakeLists.txt b/gnuradio-core/src/lib/CMakeLists.txt
index 52339fc6c..b2b63e270 100644
--- a/gnuradio-core/src/lib/CMakeLists.txt
+++ b/gnuradio-core/src/lib/CMakeLists.txt
@@ -68,6 +68,9 @@ if(LINUX)
     list(APPEND gnuradio_core_libs rt)
 endif()
 
+# Link against libvolk
+list(APPEND gnuradio_core_libs volk)
+
 add_library(gnuradio-core SHARED ${gnuradio_core_sources})
 target_link_libraries(gnuradio-core ${gnuradio_core_libs})
 GR_LIBRARY_FOO(gnuradio-core RUNTIME_COMPONENT "core_runtime" DEVEL_COMPONENT "core_devel")
-- 
cgit 


From accc35f380e56af156153cd01540a46c7eb98f12 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 22 Jan 2012 17:46:22 -0500
Subject: core: test case of using Volk to convert from float to short.

---
 gnuradio-core/src/lib/general/gr_float_to_short.cc | 24 ++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc
index 084f76f9c..415ea6982 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc
@@ -27,6 +27,9 @@
 #include <gr_float_to_short.h>
 #include <gr_io_signature.h>
 #include <gri_float_to_short.h>
+#include <volk/volk.h>
+
+#include <iostream>
 
 gr_float_to_short_sptr
 gr_make_float_to_short ()
@@ -39,6 +42,9 @@ gr_float_to_short::gr_float_to_short ()
 		   gr_make_io_signature (1, 1, sizeof (float)),
 		   gr_make_io_signature (1, 1, sizeof (short)))
 {
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(short);
+ set_alignment(alignment_multiple);
 }
 
 int
@@ -49,8 +55,22 @@ gr_float_to_short::work (int noutput_items,
   const float *in = (const float *) input_items[0];
   short *out = (short *) output_items[0];
 
-  gri_float_to_short (in, out, noutput_items);
-  
+#if 0
+  if(is_unaligned()) {
+    float d_scale = 1.0;
+    //gri_float_to_short (in, out, noutput_items);
+    volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
+  }
+  else {
+    float d_scale = 1.0;
+    volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items);
+  }
+#else
+  float d_scale = 1.0;
+  volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
+  //gri_float_to_short (in, out, noutput_items);
+#endif
+
   return noutput_items;
 }
 
-- 
cgit 


From 06f40e8dbef0b082e23703bc22775ec3458faf5b Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 22 Jan 2012 17:51:13 -0500
Subject: sched: forgot to turn debugging off.

---
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index 1c52c0e13..350518e4c 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -36,7 +36,7 @@
 #include <stdio.h>
 
 // must be defined to either 0 or 1
-#define ENABLE_LOGGING 1
+#define ENABLE_LOGGING 0
 
 #if (ENABLE_LOGGING)
 #define LOG(x) do { x; } while(0)
-- 
cgit 


From 97f6e8152646c05b570671231d2bab428696aa89 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 23 Jan 2012 16:40:36 -0500
Subject: build: look for local volk headers instead of installed.

---
 gnuradio-core/src/lib/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gnuradio-core/src/lib/CMakeLists.txt b/gnuradio-core/src/lib/CMakeLists.txt
index b2b63e270..f9756feba 100644
--- a/gnuradio-core/src/lib/CMakeLists.txt
+++ b/gnuradio-core/src/lib/CMakeLists.txt
@@ -42,6 +42,7 @@ list(APPEND test_gnuradio_core_sources bug_work_around_6.cc)
 # Setup the include and linker paths
 ########################################################################
 include_directories(${GNURADIO_CORE_INCLUDE_DIRS})
+include_directories(${VOLK_INCLUDE_DIRS})
 
 include_directories(${Boost_INCLUDE_DIRS})
 link_directories(${Boost_LIBRARY_DIRS})
-- 
cgit 


From 09be95bb8da5f41aff2fd2207fcdb6bb8f92c8cf Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 23 Jan 2012 16:41:32 -0500
Subject: sched: better comments. Handling of noutput_items adjustment done
 better and documented.

---
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index 350518e4c..3191246a7 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -36,7 +36,7 @@
 #include <stdio.h>
 
 // must be defined to either 0 or 1
-#define ENABLE_LOGGING 0
+#define ENABLE_LOGGING 1
 
 #if (ENABLE_LOGGING)
 #define LOG(x) do { x; } while(0)
@@ -83,6 +83,11 @@ min_available_space (gr_block_detail *d, int output_multiple)
       }
       return 0;
     }
+    else if (n > output_multiple) {
+      // adjust this or we often ask for too many, 
+      // causing a re-calc for fewer items.
+      n = n-output_multiple;
+    }
     min_space = std::min (min_space, n);
   }
   return min_space;
@@ -285,7 +290,7 @@ gr_block_executor::run_one_iteration()
     }
 
     // determine the minimum available output space
-    noutput_items = min_available_space (d, m->output_multiple ()) - m->output_multiple();
+    noutput_items = min_available_space (d, m->output_multiple ());
     if (ENABLE_LOGGING){
       *d_log << " regular ";
       if (m->relative_rate() >= 1.0)
@@ -321,10 +326,14 @@ gr_block_executor::run_one_iteration()
     }
     noutput_items = std::min(noutput_items, max_noutput_items);
 
-    // Check if we're still unaligned; only use up items until we're
+    // Check if we're still unaligned; use up items until we're
     // aligned again. Otherwise, make sure we set the alignment
     // requirement.
     if(m->is_unaligned()) {
+      // When unaligned, don't just set noutput_items to the remaining
+      // samples to meet alignment; this causes too much overhead in
+      // requiring a premature call back here. Set the maximum amount
+      // of samples to handle unalignment and get us back aligned.
       if(noutput_items >= m->unaligned()) {
 	noutput_items = round_up(noutput_items, m->alignment()) \
 	  - (m->alignment() - m->unaligned());
@@ -335,12 +344,14 @@ gr_block_executor::run_one_iteration()
       }
     }
     else if(noutput_items < m->alignment()) {
-      //m->set_unaligned(m->alignment());
+      // if we don't have enough for an aligned call, keep track of
+      // misalignment, set unaligned flag, and proceed.
       new_alignment = m->alignment() - noutput_items;
       m->set_unaligned(new_alignment);
       m->set_is_unaligned(true);
     }
     else {
+      // enough to round down to the nearest alignment and process.
       noutput_items = round_down(noutput_items, m->alignment());
       m->set_is_unaligned(false);
     }
-- 
cgit 


From d825bb2bc1c7bc4105723ae5f699f283bc4f3c44 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 26 Jan 2012 17:53:49 -0500
Subject: volk: float_to_short now clips the values instead of wrapping around.

---
 volk/include/volk/volk_32f_s32f_convert_16i_a.h | 52 +++++++++++++++++++++----
 volk/include/volk/volk_32f_s32f_convert_16i_u.h | 52 +++++++++++++++++++++----
 2 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
index 0a2b4f0f2..10c921b08 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
@@ -21,17 +21,29 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
+
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1, inputVal2;
   __m128i intInputVal1, intInputVal2;
+  __m128 ret1, ret2;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < eighthPoints; number++){
     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
     inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
-    intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-    
+    // Scale and clip
+    ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+    intInputVal1 = _mm_cvtps_epi32(ret1);
+    intInputVal2 = _mm_cvtps_epi32(ret2);
+
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
 
     _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
@@ -40,7 +52,12 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
 
   number = eighthPoints * 8;    
   for(; number < num_points; number++){
-    *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -61,8 +78,15 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
+
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -70,7 +94,8 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
     ret = _mm_load_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    // Scale and clip
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
@@ -81,7 +106,12 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -98,9 +128,17 @@ static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, co
   int16_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar));
+    r  = *inputVectorPtr++ * scalar;
+    if(r < min_val)
+      r = min_val;
+    else if(r > max_val)
+      r = max_val;
+    *outputVectorPtr++ = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index dec3f1611..f339a7d10 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -21,17 +21,29 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
+
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1, inputVal2;
   __m128i intInputVal1, intInputVal2;
+  __m128 ret1, ret2;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < eighthPoints; number++){
     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
     inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
-    intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-    
+    // Scale and clip
+    ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+    intInputVal1 = _mm_cvtps_epi32(ret1);
+    intInputVal2 = _mm_cvtps_epi32(ret2);
+
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
 
     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
@@ -40,7 +52,12 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
 
   number = eighthPoints * 8;    
   for(; number < num_points; number++){
-    outputVector[number] = (int16_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -62,8 +79,15 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int16_t* outputVectorPtr = outputVector;
+
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -71,7 +95,8 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
     ret = _mm_loadu_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    // Scale and clip
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
@@ -82,7 +107,12 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int16_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -100,9 +130,17 @@ static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, co
   int16_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -32768;
+  float max_val = 32767;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++  * scalar));
+    r = *inputVectorPtr++  * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    *outputVectorPtr++ = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
-- 
cgit 


From d8b02979cef097971bc0656b904f7b51d19b03c9 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 26 Jan 2012 20:07:26 -0500
Subject: volk: fix a warning.

---
 volk/include/volk/volk_64u_popcnt_a.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/volk/include/volk/volk_64u_popcnt_a.h b/volk/include/volk/volk_64u_popcnt_a.h
index bdaa98643..4683f1e38 100644
--- a/volk/include/volk/volk_64u_popcnt_a.h
+++ b/volk/include/volk/volk_64u_popcnt_a.h
@@ -10,10 +10,11 @@
 
 static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) {
 
-  const uint32_t* valueVector = (const uint32_t*)&value;
+  //const uint32_t* valueVector = (const uint32_t*)&value;
   
   // This is faster than a lookup table
-  uint32_t retVal = valueVector[0];
+  //uint32_t retVal = valueVector[0];
+  uint32_t retVal = (uint32_t)(value && 0x00000000FFFFFFFF);
 
   retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
   retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
@@ -22,7 +23,8 @@ static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value
   retVal = (retVal + (retVal >> 16)) & 0x0000003F;
   uint64_t retVal64  = retVal;
 
-  retVal = valueVector[1];
+  //retVal = valueVector[1];
+  retVal = (uint32_t)((value && 0xFFFFFFFF00000000) >> 31);
   retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
   retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
   retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
-- 
cgit 


From 42d9560a50bbbab143b48bda5a73a5379818ddbe Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 26 Jan 2012 20:07:51 -0500
Subject: volk: float_to_int and float_to_char updated to clip instead of wrap
 around. The float to int clips at smaller than 2^32 because of the limits of
 the float representation.

---
 volk/include/volk/volk_32f_s32f_convert_32i_a.h | 60 ++++++++++++++++++++++---
 volk/include/volk/volk_32f_s32f_convert_32i_u.h | 45 ++++++++++++++++---
 volk/include/volk/volk_32f_s32f_convert_8i_a.h  | 53 ++++++++++++++++++----
 volk/include/volk/volk_32f_s32f_convert_8i_u.h  | 53 ++++++++++++++++++----
 4 files changed, 183 insertions(+), 28 deletions(-)

diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index aa370e614..15fa282fb 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -21,14 +21,22 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
+
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
+
   __m256 vScalar = _mm256_set1_ps(scalar);
   __m256 inputVal1;
   __m256i intInputVal1;
+  __m256 vmin_val = _mm256_set1_ps(min_val);
+  __m256 vmax_val = _mm256_set1_ps(max_val);
 
   for(;number < eighthPoints; number++){
     inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
 
-    intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar));
+    inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    intInputVal1 = _mm256_cvtps_epi32(inputVal1);
 
     _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
     outputVectorPtr += 8;
@@ -36,7 +44,12 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
 
   number = eighthPoints * 8;    
   for(; number < num_points; number++){
-    outputVector[number] = (int32_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_AVX */
@@ -57,14 +70,22 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
+
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1;
   __m128i intInputVal1;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < quarterPoints; number++){
     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    intInputVal1 = _mm_cvtps_epi32(inputVal1);
 
     _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
     outputVectorPtr += 4;
@@ -72,7 +93,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int32_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -93,8 +119,15 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
+
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -102,7 +135,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
     ret = _mm_load_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -113,7 +146,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int32_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -130,9 +168,17 @@ static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, co
   int32_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++  * scalar));
+    r = *inputVectorPtr++ * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    *outputVectorPtr++ = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index b4e954dc4..d8493454b 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -21,14 +21,24 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
+
+  //float min_val = -2147483647;
+  //float max_val = 2147483647;
+  float min_val = -2146400000;
+  float max_val = 2146400000;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1;
   __m128i intInputVal1;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < quarterPoints; number++){
     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    intInputVal1 = _mm_cvtps_epi32(inputVal1);
 
     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
     outputVectorPtr += 4;
@@ -36,7 +46,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int32_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -58,8 +73,15 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
     
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
+
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -67,7 +89,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
     ret = _mm_loadu_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -78,7 +100,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int32_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -96,9 +123,17 @@ static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, co
   int32_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -2147483647;
+  float max_val = 2147483647;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++  * scalar));
+    r = *inputVectorPtr++ * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    *outputVectorPtr++ = (int32_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
index 8d87a07d7..05172171c 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
     
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
+
+  float min_val = -128;
+  float max_val = 127;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1, inputVal2, inputVal3, inputVal4;
   __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < sixteenthPoints; number++){
     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
     inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
     inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
-    intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-    intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
-    intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+    inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+    inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+    intInputVal1 = _mm_cvtps_epi32(inputVal1);
+    intInputVal2 = _mm_cvtps_epi32(inputVal2);
+    intInputVal3 = _mm_cvtps_epi32(inputVal3);
+    intInputVal4 = _mm_cvtps_epi32(inputVal4);
     
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
 
   number = sixteenthPoints * 16;    
   for(; number < num_points; number++){
-    outputVector[number] = (int8_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int8_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -67,9 +84,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
   const unsigned int quarterPoints = num_points / 4;
     
   const float* inputVectorPtr = (const float*)inputVector;
+
+  float min_val = -128;
+  float max_val = 127;
+  float r;
+
   int8_t* outputVectorPtr = outputVector;
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -77,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
     ret = _mm_load_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -88,7 +112,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int8_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int8_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -105,9 +134,17 @@ static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, cons
   int8_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -128;
+  float max_val = 127;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = (int8_t)(*inputVectorPtr++  * scalar);
+    r = *inputVectorPtr++ * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    *outputVectorPtr++ = (int8_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index 1c6bf87c9..12991e9c1 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
     
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
+
+  float min_val = -128;
+  float max_val = 127;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 inputVal1, inputVal2, inputVal3, inputVal4;
   __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   for(;number < sixteenthPoints; number++){
     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
     inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
     inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
 
-    intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
-    intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-    intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
-    intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+    inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+    inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+    inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+    intInputVal1 = _mm_cvtps_epi32(inputVal1);
+    intInputVal2 = _mm_cvtps_epi32(inputVal2);
+    intInputVal3 = _mm_cvtps_epi32(inputVal3);
+    intInputVal4 = _mm_cvtps_epi32(inputVal4);
     
     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
 
   number = sixteenthPoints * 16;    
   for(; number < num_points; number++){
-    outputVector[number] = (int8_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -69,8 +86,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
     
   const float* inputVectorPtr = (const float*)inputVector;
   int8_t* outputVectorPtr = outputVector;
+
+  float min_val = -128;
+  float max_val = 127;
+  float r;
+
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
+  __m128 vmin_val = _mm_set_ps1(min_val);
+  __m128 vmax_val = _mm_set_ps1(max_val);
 
   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
@@ -78,7 +102,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
     ret = _mm_loadu_ps(inputVectorPtr);
     inputVectorPtr += 4;
 
-    ret = _mm_mul_ps(ret, vScalar);
+    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -89,7 +113,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
 
   number = quarterPoints * 4;    
   for(; number < num_points; number++){
-    outputVector[number] = (int8_t)(inputVector[number] * scalar);
+    r = inputVector[number] * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    outputVector[number] = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -107,9 +136,17 @@ static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, cons
   int8_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
+  float min_val = -128;
+  float max_val = 127;
+  float r;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++  * scalar));
+    r = *inputVectorPtr++ * scalar;
+    if(r > max_val)
+      r = max_val;
+    else if(r < min_val)
+      r = min_val;
+    *outputVectorPtr++ = (int16_t)(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
-- 
cgit 


From 3737aa4b86af3b489ef3b91d8580bcd893295042 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 26 Jan 2012 20:20:49 -0500
Subject: core: moved float_to_X type converters over to use Volk calls.

---
 gnuradio-core/src/lib/general/gr_float_to_char.cc  | 18 +++++++++++++++++-
 gnuradio-core/src/lib/general/gr_float_to_int.cc   | 18 +++++++++++++++++-
 gnuradio-core/src/lib/general/gr_float_to_short.cc | 13 ++++---------
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc
index 88b9d276e..2d7e21f2e 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc
@@ -27,6 +27,7 @@
 #include <gr_float_to_char.h>
 #include <gr_io_signature.h>
 #include <gri_float_to_char.h>
+#include <volk/volk.h>
 
 gr_float_to_char_sptr
 gr_make_float_to_char ()
@@ -39,6 +40,9 @@ gr_float_to_char::gr_float_to_char ()
 		   gr_make_io_signature (1, 1, sizeof (float)),
 		   gr_make_io_signature (1, 1, sizeof (char)))
 {
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(char);
+ set_alignment(alignment_multiple);
 }
 
 int
@@ -46,10 +50,22 @@ gr_float_to_char::work (int noutput_items,
 			 gr_vector_const_void_star &input_items,
 			 gr_vector_void_star &output_items)
 {
+#if 1
+  float d_scale = 1.0;
   const float *in = (const float *) input_items[0];
-  char *out = (char *) output_items[0];
+  int8_t *out = (int8_t *) output_items[0];
 
+  if(is_unaligned()) {
+    volk_32f_s32f_convert_8i_u(out, in, d_scale, noutput_items);
+  }
+  else {
+    volk_32f_s32f_convert_8i_a(out, in, d_scale, noutput_items);
+  }
+#else
+  const float *in = (const float *) input_items[0];
+  char *out = (char *) output_items[0];
   gri_float_to_char (in, out, noutput_items);
+#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc
index 2349de8cb..2ca723c7c 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc
@@ -27,6 +27,7 @@
 #include <gr_float_to_int.h>
 #include <gr_io_signature.h>
 #include <gri_float_to_int.h>
+#include <volk/volk.h>
 
 gr_float_to_int_sptr
 gr_make_float_to_int ()
@@ -39,6 +40,9 @@ gr_float_to_int::gr_float_to_int ()
 		   gr_make_io_signature (1, 1, sizeof (float)),
 		   gr_make_io_signature (1, 1, sizeof (int)))
 {
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(int);
+ set_alignment(alignment_multiple);
 }
 
 int
@@ -46,10 +50,22 @@ gr_float_to_int::work (int noutput_items,
 			 gr_vector_const_void_star &input_items,
 			 gr_vector_void_star &output_items)
 {
+#if 1
+  float d_scale = 1.0;
   const float *in = (const float *) input_items[0];
-  int *out = (int *) output_items[0];
+  int32_t *out = (int32_t *) output_items[0];
 
+  if(is_unaligned()) {
+    volk_32f_s32f_convert_32i_u(out, in, d_scale, noutput_items);
+  }
+  else {
+    volk_32f_s32f_convert_32i_a(out, in, d_scale, noutput_items);
+  }
+#else
+  const float *in = (const float *) input_items[0];
+  int *out = (int *) output_items[0];
   gri_float_to_int (in, out, noutput_items);
+#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc
index 415ea6982..6c4d031ac 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc
@@ -29,8 +29,6 @@
 #include <gri_float_to_short.h>
 #include <volk/volk.h>
 
-#include <iostream>
-
 gr_float_to_short_sptr
 gr_make_float_to_short ()
 {
@@ -55,20 +53,17 @@ gr_float_to_short::work (int noutput_items,
   const float *in = (const float *) input_items[0];
   short *out = (short *) output_items[0];
 
-#if 0
+#if 1
+  float d_scale = 1.0;
+  //volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
   if(is_unaligned()) {
-    float d_scale = 1.0;
-    //gri_float_to_short (in, out, noutput_items);
     volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
   }
   else {
-    float d_scale = 1.0;
     volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items);
   }
 #else
-  float d_scale = 1.0;
-  volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
-  //gri_float_to_short (in, out, noutput_items);
+  gri_float_to_short (in, out, noutput_items);
 #endif
 
   return noutput_items;
-- 
cgit 


From 2a2663d625847217a237acba0229738a81003eef Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 26 Jan 2012 20:22:10 -0500
Subject: QA: type converter QA codes added and updated for Volk. Some small
 differences between Volk and non-Volk for rounding issues are made here.

---
 gnuradio-core/src/python/gnuradio/gr/Makefile.am   |  2 +
 .../src/python/gnuradio/gr/qa_float_to_char.py     | 64 +++++++++++++++++++
 .../src/python/gnuradio/gr/qa_float_to_int.py      | 12 ++--
 .../src/python/gnuradio/gr/qa_float_to_short.py    | 71 ++++++++++++++++++++++
 .../src/python/gnuradio/gr/qa_float_to_uchar.py    | 64 +++++++++++++++++++
 5 files changed, 209 insertions(+), 4 deletions(-)
 create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
 mode change 100644 => 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
 create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
 create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py

diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
index f5af80c78..16dd14790 100644
--- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am
+++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
@@ -60,7 +60,9 @@ noinst_PYTHON = 			\
 	qa_fft.py			\
 	qa_fft_filter.py		\
 	qa_filter_delay_fc.py		\
+	qa_float_to_char.py		\
 	qa_float_to_int.py		\
+	qa_float_to_short.py		\
 	qa_fractional_interpolator.py   \
 	qa_frequency_modulator.py	\
 	qa_fsk_stuff.py			\
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
new file mode 100755
index 000000000..45df71d0a
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+class test_float_to_char (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_001(self):
+
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+        expected_result = [0, 1, 2, 3, 4, 5, 255, 254, 253]
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_char()
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+    def test_002(self):
+
+        src_data = ( 126.0, 127.0, 128.0)
+        expected_result = [ 126, 127, 127 ]
+
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_char()
+        # Note: vector_sink_b returns uchar
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_float_to_char, "test_float_to_char.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
old mode 100644
new mode 100755
index 3e0b847a2..4cc3d0056
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
@@ -34,6 +34,10 @@ class test_float_to_int (gr_unittest.TestCase):
 
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
         expected_result = [int(round(s)) for s in src_data]
+        
+        ### Volk results
+        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5]
+
         src = gr.vector_source_f(src_data)
         op = gr.float_to_int()
         dst = gr.vector_sink_i()
@@ -46,10 +50,10 @@ class test_float_to_int (gr_unittest.TestCase):
 
     def test_002(self):
 
-        src_data = ( 2147483647,  2147483648,  2200000000,
-                    -2147483648, -2147483649, -2200000000)
-        expected_result = [ 2147483647,  2147483647,  2147483647,
-                           -2147483647, -2147483647, -2147483647]
+        src_data = (  2146400000,   2147483647,
+                     -2146400000,  -2147483648 )
+        expected_result = [  2146400000,   2146400000,
+                            -2146400000,  -2146400000 ]
         src = gr.vector_source_f(src_data)
         op = gr.float_to_int()
         dst = gr.vector_sink_i()
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
new file mode 100755
index 000000000..aa26668c8
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_float_to_short (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_001(self):
+
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
+        expected_result = [int(round(s)) for s in src_data]
+
+        ### Volk results
+        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5]
+
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_short()
+        dst = gr.vector_sink_s()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+    def test_002(self):
+
+        src_data = ( 32766,  32767,  32768,
+                    -32767, -32768, -32769)
+        expected_result = [ 32766,  32767,  32767,
+                           -32767, -32768, -32768 ]
+
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_short()
+        dst = gr.vector_sink_s()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_float_to_short, "test_float_to_short.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py
new file mode 100755
index 000000000..0d54f45f3
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_float_to_uchar (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_001(self):
+
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
+        expected_result = [0, 1, 2, 3, 4, 6, 0, 0, 0, 0, 0]
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_uchar()
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+    def test_002(self):
+
+        src_data = ( 254.0,  255.0, 256.0)
+        expected_result = [ 254, 255, 255 ]
+        src = gr.vector_source_f(src_data)
+        op = gr.float_to_uchar()
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_float_to_uchar, "test_float_to_uchar.xml")
+
-- 
cgit 


From 8f09eb204213492091fa8310b165c8ddb89b2801 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 12:49:00 -0500
Subject: volk: cleaned up float_to_X to be Volk-only and added vlen and scale
 to constructor to set the vector length and a free scaling with the data.

---
 gnuradio-core/src/lib/general/gr_float_to_char.cc  | 42 +++++++++++---------
 gnuradio-core/src/lib/general/gr_float_to_char.h   | 16 ++++++--
 gnuradio-core/src/lib/general/gr_float_to_char.i   | 10 +++--
 gnuradio-core/src/lib/general/gr_float_to_int.cc   | 46 ++++++++++++----------
 gnuradio-core/src/lib/general/gr_float_to_int.h    | 16 ++++++--
 gnuradio-core/src/lib/general/gr_float_to_int.i    | 10 +++--
 gnuradio-core/src/lib/general/gr_float_to_short.cc | 41 +++++++++++--------
 gnuradio-core/src/lib/general/gr_float_to_short.h  | 15 +++++--
 gnuradio-core/src/lib/general/gr_float_to_short.i  | 10 +++--
 9 files changed, 130 insertions(+), 76 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc
index 2d7e21f2e..165172ac6 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,19 +30,32 @@
 #include <volk/volk.h>
 
 gr_float_to_char_sptr
-gr_make_float_to_char ()
+gr_make_float_to_char (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_float_to_char ());
+  return gnuradio::get_initial_sptr(new gr_float_to_char (vlen, scale));
 }
 
-gr_float_to_char::gr_float_to_char ()
+gr_float_to_char::gr_float_to_char (size_t vlen, float scale)
   : gr_sync_block ("gr_float_to_char",
-		   gr_make_io_signature (1, 1, sizeof (float)),
-		   gr_make_io_signature (1, 1, sizeof (char)))
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (char)*vlen)),
+    d_vlen(vlen), d_scale(scale)
 {
- const int alignment_multiple =
-   volk_get_alignment() / sizeof(char);
- set_alignment(alignment_multiple);
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(char);
+  set_alignment(alignment_multiple);
+}
+
+float 
+gr_float_to_char::scale() const
+{
+  return d_scale;
+}
+
+void
+gr_float_to_char::set_scale(float scale)
+{
+  d_scale = scale;
 }
 
 int
@@ -50,22 +63,15 @@ gr_float_to_char::work (int noutput_items,
 			 gr_vector_const_void_star &input_items,
 			 gr_vector_void_star &output_items)
 {
-#if 1
-  float d_scale = 1.0;
   const float *in = (const float *) input_items[0];
   int8_t *out = (int8_t *) output_items[0];
 
   if(is_unaligned()) {
-    volk_32f_s32f_convert_8i_u(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_8i_u(out, in, d_scale, d_vlen*noutput_items);
   }
   else {
-    volk_32f_s32f_convert_8i_a(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_8i_a(out, in, d_scale, d_vlen*noutput_items);
   }
-#else
-  const float *in = (const float *) input_items[0];
-  char *out = (char *) output_items[0];
-  gri_float_to_char (in, out, noutput_items);
-#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.h b/gnuradio-core/src/lib/general/gr_float_to_char.h
index 434e2e9d0..c88645a18 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.h
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,7 +30,7 @@ class gr_float_to_char;
 typedef boost::shared_ptr<gr_float_to_char> gr_float_to_char_sptr;
 
 GR_CORE_API gr_float_to_char_sptr
-gr_make_float_to_char ();
+gr_make_float_to_char (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of float to a stream of char
@@ -39,10 +39,18 @@ gr_make_float_to_char ();
 
 class GR_CORE_API gr_float_to_char : public gr_sync_block
 {
-  friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char ();
-  gr_float_to_char ();
+ private:
+  friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char 
+    (size_t vlen, float scale);
+  gr_float_to_char (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;
 
  public:
+  float scale() const;
+  void set_scale(float scale);
+
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.i b/gnuradio-core/src/lib/general/gr_float_to_char.i
index 05b206554..05939f1b1 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.i
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,13 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,float_to_char)
 
-gr_float_to_char_sptr gr_make_float_to_char ();
+gr_float_to_char_sptr
+gr_make_float_to_char (size_t vlen=1, float scale=1);
 
 class gr_float_to_char : public gr_sync_block
 {
-  gr_float_to_char ();
+public:
+  float scale() const;
+  void set_scale(float scale);
+  gr_float_to_char (size_t vlen, float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc
index 2ca723c7c..c4d9991af 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,42 +30,48 @@
 #include <volk/volk.h>
 
 gr_float_to_int_sptr
-gr_make_float_to_int ()
+gr_make_float_to_int (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_float_to_int ());
+  return gnuradio::get_initial_sptr(new gr_float_to_int (vlen, scale));
 }
 
-gr_float_to_int::gr_float_to_int ()
+gr_float_to_int::gr_float_to_int (size_t vlen, float scale)
   : gr_sync_block ("gr_float_to_int",
-		   gr_make_io_signature (1, 1, sizeof (float)),
-		   gr_make_io_signature (1, 1, sizeof (int)))
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (int)*vlen)),
+    d_vlen(vlen), d_scale(scale)
 {
- const int alignment_multiple =
-   volk_get_alignment() / sizeof(int);
- set_alignment(alignment_multiple);
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(int);
+  set_alignment(alignment_multiple);
+}
+
+float 
+gr_float_to_int::scale() const
+{
+  return d_scale;
+}
+
+void
+gr_float_to_int::set_scale(float scale)
+{
+  d_scale = scale;
 }
 
 int
 gr_float_to_int::work (int noutput_items,
-			 gr_vector_const_void_star &input_items,
-			 gr_vector_void_star &output_items)
+		       gr_vector_const_void_star &input_items,
+		       gr_vector_void_star &output_items)
 {
-#if 1
-  float d_scale = 1.0;
   const float *in = (const float *) input_items[0];
   int32_t *out = (int32_t *) output_items[0];
 
   if(is_unaligned()) {
-    volk_32f_s32f_convert_32i_u(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_32i_u(out, in, d_scale, d_vlen*noutput_items);
   }
   else {
-    volk_32f_s32f_convert_32i_a(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_32i_a(out, in, d_scale, d_vlen*noutput_items);
   }
-#else
-  const float *in = (const float *) input_items[0];
-  int *out = (int *) output_items[0];
-  gri_float_to_int (in, out, noutput_items);
-#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.h b/gnuradio-core/src/lib/general/gr_float_to_int.h
index 3324ed110..0b42c0aab 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.h
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,7 +30,7 @@ class gr_float_to_int;
 typedef boost::shared_ptr<gr_float_to_int> gr_float_to_int_sptr;
 
 GR_CORE_API gr_float_to_int_sptr
-gr_make_float_to_int ();
+gr_make_float_to_int (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of float to a stream of short
@@ -39,10 +39,18 @@ gr_make_float_to_int ();
 
 class GR_CORE_API gr_float_to_int : public gr_sync_block
 {
-  friend GR_CORE_API gr_float_to_int_sptr gr_make_float_to_int ();
-  gr_float_to_int ();
+ private:
+  friend GR_CORE_API
+    gr_float_to_int_sptr gr_make_float_to_int (size_t vlen, float scale);
+  gr_float_to_int (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;
 
  public:
+  float scale() const;
+  void set_scale(float scale);
+
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.i b/gnuradio-core/src/lib/general/gr_float_to_int.i
index 4ab04cbf2..a0b52a3c1 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.i
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,13 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,float_to_int)
 
-gr_float_to_int_sptr gr_make_float_to_int ();
+gr_float_to_int_sptr 
+gr_make_float_to_int (size_t vlen=1, float scale=1);
 
 class gr_float_to_int : public gr_sync_block
 {
-  gr_float_to_int ();
+public:
+  float scale() const;
+  void set_scale(float scale);
+  gr_float_to_int (size_t vlen, float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc
index 6c4d031ac..e6ac5c184 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,19 +30,32 @@
 #include <volk/volk.h>
 
 gr_float_to_short_sptr
-gr_make_float_to_short ()
+gr_make_float_to_short (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_float_to_short ());
+  return gnuradio::get_initial_sptr(new gr_float_to_short (vlen, scale));
 }
 
-gr_float_to_short::gr_float_to_short ()
+gr_float_to_short::gr_float_to_short (size_t vlen, float scale)
   : gr_sync_block ("gr_float_to_short",
-		   gr_make_io_signature (1, 1, sizeof (float)),
-		   gr_make_io_signature (1, 1, sizeof (short)))
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (short)*vlen)),
+    d_vlen(vlen), d_scale(scale)
 {
- const int alignment_multiple =
-   volk_get_alignment() / sizeof(short);
- set_alignment(alignment_multiple);
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(short);
+  set_alignment(alignment_multiple);
+}
+
+float 
+gr_float_to_short::scale() const
+{
+  return d_scale;
+}
+
+void
+gr_float_to_short::set_scale(float scale)
+{
+  d_scale = scale;
 }
 
 int
@@ -53,18 +66,12 @@ gr_float_to_short::work (int noutput_items,
   const float *in = (const float *) input_items[0];
   short *out = (short *) output_items[0];
 
-#if 1
-  float d_scale = 1.0;
-  //volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
   if(is_unaligned()) {
-    volk_32f_s32f_convert_16i_u(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_16i_u(out, in, d_scale, d_vlen*noutput_items);
   }
   else {
-    volk_32f_s32f_convert_16i_a(out, in, d_scale, noutput_items);
+    volk_32f_s32f_convert_16i_a(out, in, d_scale, d_vlen*noutput_items);
   }
-#else
-  gri_float_to_short (in, out, noutput_items);
-#endif
 
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.h b/gnuradio-core/src/lib/general/gr_float_to_short.h
index 010d61141..93e441f41 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.h
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,7 +30,7 @@ class gr_float_to_short;
 typedef boost::shared_ptr<gr_float_to_short> gr_float_to_short_sptr;
 
 GR_CORE_API gr_float_to_short_sptr
-gr_make_float_to_short ();
+gr_make_float_to_short (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of float to a stream of short
@@ -39,10 +39,17 @@ gr_make_float_to_short ();
 
 class GR_CORE_API gr_float_to_short : public gr_sync_block
 {
-  friend GR_CORE_API gr_float_to_short_sptr gr_make_float_to_short ();
-  gr_float_to_short ();
+  friend GR_CORE_API
+    gr_float_to_short_sptr gr_make_float_to_short (size_t vlen, float scale);
+  gr_float_to_short (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;
 
  public:
+  float scale() const;
+  void set_scale(float scale);
+
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.i b/gnuradio-core/src/lib/general/gr_float_to_short.i
index ad059c453..8da733054 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.i
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,13 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,float_to_short)
 
-gr_float_to_short_sptr gr_make_float_to_short ();
+gr_float_to_short_sptr
+gr_make_float_to_short (size_t vlen=1, float scale=1);
 
 class gr_float_to_short : public gr_sync_block
 {
-  gr_float_to_short ();
+public:
+  float scale() const;
+  void set_scale(float scale);
+  gr_float_to_short (size_t vlen, float scale);
 };
-- 
cgit 


From 9009ceb57a9eef4623216aa945cb0c4173c0ee2e Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 15:08:26 -0500
Subject: core: minor tweaks to float_to_X

---
 gnuradio-core/src/lib/general/gr_float_to_char.cc  | 1 -
 gnuradio-core/src/lib/general/gr_float_to_char.i   | 1 -
 gnuradio-core/src/lib/general/gr_float_to_int.cc   | 1 -
 gnuradio-core/src/lib/general/gr_float_to_int.i    | 1 -
 gnuradio-core/src/lib/general/gr_float_to_short.cc | 1 -
 gnuradio-core/src/lib/general/gr_float_to_short.i  | 1 -
 6 files changed, 6 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc
index 165172ac6..14635ff71 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc
@@ -26,7 +26,6 @@
 
 #include <gr_float_to_char.h>
 #include <gr_io_signature.h>
-#include <gri_float_to_char.h>
 #include <volk/volk.h>
 
 gr_float_to_char_sptr
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.i b/gnuradio-core/src/lib/general/gr_float_to_char.i
index 05939f1b1..a1c88750f 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.i
@@ -30,5 +30,4 @@ class gr_float_to_char : public gr_sync_block
 public:
   float scale() const;
   void set_scale(float scale);
-  gr_float_to_char (size_t vlen, float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc
index c4d9991af..28214538f 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc
@@ -26,7 +26,6 @@
 
 #include <gr_float_to_int.h>
 #include <gr_io_signature.h>
-#include <gri_float_to_int.h>
 #include <volk/volk.h>
 
 gr_float_to_int_sptr
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.i b/gnuradio-core/src/lib/general/gr_float_to_int.i
index a0b52a3c1..6e71f54a9 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.i
@@ -30,5 +30,4 @@ class gr_float_to_int : public gr_sync_block
 public:
   float scale() const;
   void set_scale(float scale);
-  gr_float_to_int (size_t vlen, float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc
index e6ac5c184..188bfdae3 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc
@@ -26,7 +26,6 @@
 
 #include <gr_float_to_short.h>
 #include <gr_io_signature.h>
-#include <gri_float_to_short.h>
 #include <volk/volk.h>
 
 gr_float_to_short_sptr
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.i b/gnuradio-core/src/lib/general/gr_float_to_short.i
index 8da733054..072da5213 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.i
@@ -30,5 +30,4 @@ class gr_float_to_short : public gr_sync_block
 public:
   float scale() const;
   void set_scale(float scale);
-  gr_float_to_short (size_t vlen, float scale);
 };
-- 
cgit 


From c05a75d0e1d28fe2c229a9a93ef42828929999b4 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 15:09:44 -0500
Subject: core: short_to_X: adding short_to_char block using Volk; made Volk
 version of short_to_float.

---
 gnuradio-core/src/lib/general/gr_short_to_char.cc  | 67 ++++++++++++++++++++++
 gnuradio-core/src/lib/general/gr_short_to_char.h   | 56 ++++++++++++++++++
 gnuradio-core/src/lib/general/gr_short_to_char.i   | 30 ++++++++++
 gnuradio-core/src/lib/general/gr_short_to_float.cc | 41 ++++++++++---
 gnuradio-core/src/lib/general/gr_short_to_float.h  | 18 ++++--
 gnuradio-core/src/lib/general/gr_short_to_float.i  |  9 ++-
 6 files changed, 206 insertions(+), 15 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.h
 create mode 100644 gnuradio-core/src/lib/general/gr_short_to_char.i

diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.cc b/gnuradio-core/src/lib/general/gr_short_to_char.cc
new file mode 100644
index 000000000..a3c096e6d
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.cc
@@ -0,0 +1,67 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_short_to_char.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_short_to_char (vlen));
+}
+
+gr_short_to_char::gr_short_to_char (size_t vlen)
+  : gr_sync_block ("gr_short_to_char",
+		   gr_make_io_signature (1, 1, sizeof (short)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (char)*vlen)),
+    d_vlen(vlen)
+{
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(char);
+  set_alignment(alignment_multiple);
+}
+
+int
+gr_short_to_char::work (int noutput_items,
+			gr_vector_const_void_star &input_items,
+			gr_vector_void_star &output_items)
+{
+  const int16_t *in = (const int16_t *) input_items[0];
+  int8_t *out = (int8_t *) output_items[0];
+
+  if(is_unaligned()) {
+    volk_16i_convert_8i_u(out, in, d_vlen*noutput_items);
+  }
+  else {
+    volk_16i_convert_8i_a(out, in, d_vlen*noutput_items);
+  }
+  
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.h b/gnuradio-core/src/lib/general/gr_short_to_char.h
new file mode 100644
index 000000000..9682d86ec
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_SHORT_TO_CHAR_H
+#define INCLUDED_GR_SHORT_TO_CHAR_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_short_to_char;
+typedef boost::shared_ptr<gr_short_to_char> gr_short_to_char_sptr;
+
+GR_CORE_API gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen=1);
+
+/*!
+ * \brief Convert stream of short to a stream of float
+ * \ingroup converter_blk
+ */
+
+class GR_CORE_API gr_short_to_char : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_short_to_char_sptr
+    gr_make_short_to_char (size_t vlen);
+  gr_short_to_char (size_t vlen);
+
+  size_t d_vlen;
+
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_SHORT_TO_CHAR_H */
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.i b/gnuradio-core/src/lib/general/gr_short_to_char.i
new file mode 100644
index 000000000..8fa453a06
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.i
@@ -0,0 +1,30 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,short_to_char)
+
+gr_short_to_char_sptr gr_make_short_to_char (size_t vlen=1);
+
+class gr_short_to_char : public gr_sync_block
+{
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc
index 7b80953ac..d11618414 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -27,18 +27,35 @@
 #include <gr_short_to_float.h>
 #include <gr_io_signature.h>
 #include <gri_short_to_float.h>
+#include <volk/volk.h>
 
 gr_short_to_float_sptr
-gr_make_short_to_float ()
+gr_make_short_to_float (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_short_to_float ());
+  return gnuradio::get_initial_sptr(new gr_short_to_float (vlen, scale));
 }
 
-gr_short_to_float::gr_short_to_float ()
+gr_short_to_float::gr_short_to_float (size_t vlen, float scale)
   : gr_sync_block ("gr_short_to_float",
-		   gr_make_io_signature (1, 1, sizeof (short)),
-		   gr_make_io_signature (1, 1, sizeof (float)))
+		   gr_make_io_signature (1, 1, sizeof (short)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+    d_vlen(vlen), d_scale(scale)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
+}
+
+float 
+gr_short_to_float::scale() const
+{
+  return d_scale;
+}
+
+void
+gr_short_to_float::set_scale(float scale)
+{
+  d_scale = scale;
 }
 
 int
@@ -49,7 +66,17 @@ gr_short_to_float::work (int noutput_items,
   const short *in = (const short *) input_items[0];
   float *out = (float *) output_items[0];
 
-  gri_short_to_float (in, out, noutput_items);
+#if 0
+  if(is_unaligned()) {
+    volk_16i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+  }
+  else {
+    float d_scale = 1.0;
+    volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
+  }
+#else
+  gri_short_to_float (in, out, d_vlen*noutput_items);
+#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.h b/gnuradio-core/src/lib/general/gr_short_to_float.h
index b40c966ea..efdc81ecd 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.h
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,7 +30,7 @@ class gr_short_to_float;
 typedef boost::shared_ptr<gr_short_to_float> gr_short_to_float_sptr;
 
 GR_CORE_API gr_short_to_float_sptr
-gr_make_short_to_float ();
+gr_make_short_to_float (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of short to a stream of float
@@ -39,10 +39,18 @@ gr_make_short_to_float ();
 
 class GR_CORE_API gr_short_to_float : public gr_sync_block
 {
-  friend GR_CORE_API gr_short_to_float_sptr gr_make_short_to_float ();
-  gr_short_to_float ();
-
+ private:
+  friend GR_CORE_API gr_short_to_float_sptr
+    gr_make_short_to_float (size_t vlen, float scale);
+  gr_short_to_float (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;
+  
  public:
+  float scale() const;
+  void set_scale(float scale);
+  
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.i b/gnuradio-core/src/lib/general/gr_short_to_float.i
index 56759df29..229618890 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.i
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,12 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,short_to_float)
 
-gr_short_to_float_sptr gr_make_short_to_float ();
+gr_short_to_float_sptr
+gr_make_short_to_float (size_t vlen=1, float scale=1);
 
 class gr_short_to_float : public gr_sync_block
 {
-  gr_short_to_float ();
+public:
+  float scale() const;
+  void set_scale(float scale);
 };
-- 
cgit 


From 7943c197eccdb16e6c42af5544b9cb34a089b360 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 15:11:17 -0500
Subject: core: cleanup short_to_float.

---
 gnuradio-core/src/lib/general/gr_short_to_float.cc | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc
index d11618414..9f71f52ed 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc
@@ -26,7 +26,6 @@
 
 #include <gr_short_to_float.h>
 #include <gr_io_signature.h>
-#include <gri_short_to_float.h>
 #include <volk/volk.h>
 
 gr_short_to_float_sptr
@@ -66,17 +65,12 @@ gr_short_to_float::work (int noutput_items,
   const short *in = (const short *) input_items[0];
   float *out = (float *) output_items[0];
 
-#if 0
   if(is_unaligned()) {
     volk_16i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
   }
   else {
-    float d_scale = 1.0;
     volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
   }
-#else
-  gri_short_to_float (in, out, d_vlen*noutput_items);
-#endif
   
   return noutput_items;
 }
-- 
cgit 


From c9a420645f16f5c28fdcf05c1c09b9a91b95e640 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 15:11:50 -0500
Subject: core: QA codes for float_to_X and short_to_X (note: float_to_int
 needs work).

---
 .../src/python/gnuradio/gr/qa_float_to_char.py     | 20 ++++++-
 .../src/python/gnuradio/gr/qa_float_to_int.py      | 22 ++++++-
 .../src/python/gnuradio/gr/qa_float_to_short.py    | 20 ++++++-
 .../src/python/gnuradio/gr/qa_short_to_char.py     | 69 +++++++++++++++++++++
 .../src/python/gnuradio/gr/qa_short_to_float.py    | 70 ++++++++++++++++++++++
 5 files changed, 197 insertions(+), 4 deletions(-)
 create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py
 create mode 100755 gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py

diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
index 45df71d0a..ecdd36228 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright 2011 Free Software Foundation, Inc.
+# Copyright 2011,2012 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -59,6 +59,24 @@ class test_float_to_char (gr_unittest.TestCase):
 
         self.assertEqual(expected_result, result_data)
 
+    def test_003(self):
+        
+        scale = 2
+        vlen = 3
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+        expected_result = [0, 2, 4, 6, 8, 11, 254, 252, 250]
+        src = gr.vector_source_f(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+        op = gr.float_to_char(vlen, scale)
+        v2s = gr.vector_to_stream(gr.sizeof_char, vlen)
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
 if __name__ == '__main__':
     gr_unittest.run(test_float_to_char, "test_float_to_char.xml")
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
index 4cc3d0056..559f90f05 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
@@ -33,7 +33,6 @@ class test_float_to_int (gr_unittest.TestCase):
     def test_001(self):
 
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
-        expected_result = [int(round(s)) for s in src_data]
         
         ### Volk results
         expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5]
@@ -53,7 +52,7 @@ class test_float_to_int (gr_unittest.TestCase):
         src_data = (  2146400000,   2147483647,
                      -2146400000,  -2147483648 )
         expected_result = [  2146400000,   2146400000,
-                            -2146400000,  -2146400000 ]
+                            -2146400000,  -2147483648 ]
         src = gr.vector_source_f(src_data)
         op = gr.float_to_int()
         dst = gr.vector_sink_i()
@@ -64,6 +63,25 @@ class test_float_to_int (gr_unittest.TestCase):
 
         self.assertEqual(expected_result, result_data)
 
+
+    def test_003(self):
+        
+        scale = 2
+        vlen = 3
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6,]
+        src = gr.vector_source_f(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+        op = gr.float_to_int(vlen, scale)
+        v2s = gr.vector_to_stream(gr.sizeof_int, vlen)
+        dst = gr.vector_sink_i()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
 if __name__ == '__main__':
     gr_unittest.run(test_float_to_int, "test_float_to_int.xml")
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
index aa26668c8..926f1c08b 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright 2011 Free Software Foundation, Inc.
+# Copyright 2011,2012 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -66,6 +66,24 @@ class test_float_to_short (gr_unittest.TestCase):
 
         self.assertEqual(expected_result, result_data)
 
+    def test_003(self):
+
+        scale = 2
+        vlen = 3
+        src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6]
+        src = gr.vector_source_f(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+        op = gr.float_to_short(vlen, scale)
+        v2s = gr.vector_to_stream(gr.sizeof_short, vlen)
+        dst = gr.vector_sink_s()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
 if __name__ == '__main__':
     gr_unittest.run(test_float_to_short, "test_float_to_short.xml")
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py
new file mode 100755
index 000000000..6a95fa01d
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_short_to_char (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_001(self):
+
+        src_data = range(0, 32767, 32767/127)
+        src_data = [int(s) for s in src_data]
+        expected_result = range(0, 128)
+        src = gr.vector_source_s(src_data)
+        op = gr.short_to_char()
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+    def test_002(self):
+
+        vlen = 3
+        src_data = range(0, 32400, 32767/127)
+        src_data = [int(s) for s in src_data]
+        expected_result = range(0, 126)
+        src = gr.vector_source_s(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_short, vlen)
+        op = gr.short_to_char(vlen)
+        v2s = gr.vector_to_stream(gr.sizeof_char, vlen)
+        dst = gr.vector_sink_b()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_short_to_char, "test_short_to_char.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py
new file mode 100755
index 000000000..8f331b495
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_short_to_float (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_001(self):
+
+        src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5)
+        expected_result = [ 0.0,  1.0,  2.0,  3.0,  4.0, 5.0,
+                           -1.0, -2.0, -3.0, -4.0, -5.0]
+
+        src = gr.vector_source_s(src_data)
+        op = gr.short_to_float()
+        dst = gr.vector_sink_f()
+
+        self.tb.connect(src, op, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+    def test_002(self):
+
+        vlen = 3
+        src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3)
+        expected_result = [0.0, 1.0, 2.0, 3.0, 4.0,
+                           5.0, -1.0, -2.0, -3.0]
+        src = gr.vector_source_s(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_short, vlen)
+        op = gr.short_to_float(vlen)
+        v2s = gr.vector_to_stream(gr.sizeof_float, vlen)
+        dst = gr.vector_sink_f()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_short_to_float, "test_short_to_float.xml")
+
-- 
cgit 


From f63927a46517ea9c7e914feb9177896219b33a0d Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 19:25:04 -0500
Subject: core: more conversion work to Volk for type converters.

---
 gnuradio-core/src/lib/general/gr_char_to_float.cc  | 39 ++++++++++++++++------
 gnuradio-core/src/lib/general/gr_char_to_float.h   | 14 ++++++--
 gnuradio-core/src/lib/general/gr_char_to_float.i   |  7 ++--
 gnuradio-core/src/lib/general/gr_int_to_float.cc   | 30 ++++++++++++-----
 gnuradio-core/src/lib/general/gr_int_to_float.h    | 16 ++++++---
 gnuradio-core/src/lib/general/gr_int_to_float.i    |  8 +++--
 gnuradio-core/src/lib/general/gr_short_to_char.i   |  3 +-
 gnuradio-core/src/lib/general/gr_short_to_float.cc |  1 -
 .../src/python/gnuradio/gr/qa_int_to_float.py      | 20 +++++++++++
 9 files changed, 105 insertions(+), 33 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.cc b/gnuradio-core/src/lib/general/gr_char_to_float.cc
index e68f8d208..ffe8ee4a1 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.cc
@@ -26,30 +26,47 @@
 
 #include <gr_char_to_float.h>
 #include <gr_io_signature.h>
-#include <gri_char_to_float.h>
+#include <volk/volk.h>
 
 gr_char_to_float_sptr
-gr_make_char_to_float ()
+gr_make_char_to_float (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_char_to_float ());
+  return gnuradio::get_initial_sptr(new gr_char_to_float (vlen, scale));
 }
 
-gr_char_to_float::gr_char_to_float ()
+gr_char_to_float::gr_char_to_float (size_t vlen, float scale)
   : gr_sync_block ("gr_char_to_float",
-		   gr_make_io_signature (1, 1, sizeof (char)),
-		   gr_make_io_signature (1, 1, sizeof (float)))
+		   gr_make_io_signature (1, 1, sizeof (char)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+    d_vlen(vlen), d_scale(scale)    
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
+}
+
+float 
+gr_char_to_float::scale() const
+{
+  return d_scale;
+}
+
+void
+gr_char_to_float::set_scale(float scale)
+{
+  d_scale = scale;
 }
 
 int
 gr_char_to_float::work (int noutput_items,
-			 gr_vector_const_void_star &input_items,
-			 gr_vector_void_star &output_items)
+			gr_vector_const_void_star &input_items,
+			gr_vector_void_star &output_items)
 {
-  const char *in = (const char *) input_items[0];
+  const int8_t *in = (const int8_t *) input_items[0];
   float *out = (float *) output_items[0];
 
-  gri_char_to_float (in, out, noutput_items);
-  
+  // Note: the unaligned benchmarked much faster than the aligned
+  volk_8i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.h b/gnuradio-core/src/lib/general/gr_char_to_float.h
index b20d2066f..4ad8e59a8 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.h
@@ -30,7 +30,7 @@ class gr_char_to_float;
 typedef boost::shared_ptr<gr_char_to_float> gr_char_to_float_sptr;
 
 GR_CORE_API gr_char_to_float_sptr
-gr_make_char_to_float ();
+gr_make_char_to_float (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of chars to a stream of float
@@ -39,10 +39,18 @@ gr_make_char_to_float ();
 
 class GR_CORE_API gr_char_to_float : public gr_sync_block
 {
-  friend GR_CORE_API gr_char_to_float_sptr gr_make_char_to_float ();
-  gr_char_to_float ();
+ private:
+  friend GR_CORE_API gr_char_to_float_sptr
+    gr_make_char_to_float (size_t vlen, float scale);
+  gr_char_to_float (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;
 
  public:
+  float scale() const;
+  void set_scale(float scale);
+
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.i b/gnuradio-core/src/lib/general/gr_char_to_float.i
index 0403b621d..65ad861f2 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.i
@@ -22,9 +22,12 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,char_to_float)
 
-gr_char_to_float_sptr gr_make_char_to_float ();
+gr_char_to_float_sptr
+gr_make_char_to_float (size_t vlen=1, float scale=1);
 
 class gr_char_to_float : public gr_sync_block
 {
-  gr_char_to_float ();
+public:
+  float scale() const;
+  void set_scale(float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.cc b/gnuradio-core/src/lib/general/gr_int_to_float.cc
index 29ca22add..dca0e1b89 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -27,18 +27,23 @@
 #include <gr_int_to_float.h>
 #include <gr_io_signature.h>
 #include <gri_int_to_float.h>
+#include <volk/volk.h>
 
 gr_int_to_float_sptr
-gr_make_int_to_float ()
+gr_make_int_to_float (size_t vlen, float scale)
 {
-  return gnuradio::get_initial_sptr(new gr_int_to_float ());
+  return gnuradio::get_initial_sptr(new gr_int_to_float (vlen, scale));
 }
 
-gr_int_to_float::gr_int_to_float ()
+gr_int_to_float::gr_int_to_float (size_t vlen, float scale)
   : gr_sync_block ("gr_int_to_float",
-		   gr_make_io_signature (1, 1, sizeof (int32_t)),
-		   gr_make_io_signature (1, 1, sizeof (float)))
+		   gr_make_io_signature (1, 1, sizeof (int32_t)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+    d_vlen(vlen), d_scale(scale)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -48,8 +53,17 @@ gr_int_to_float::work (int noutput_items,
 {
   const int32_t *in = (const int32_t *) input_items[0];
   float *out = (float *) output_items[0];
-
-  gri_int_to_float(in, out, noutput_items);
+  
+#if 1
+  if(is_unaligned()) {
+    volk_32i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+  }
+  else {
+    volk_32i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
+  }
+#else
+  gri_int_to_float(in, out, d_vlen*noutput_items);
+#endif
   
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.h b/gnuradio-core/src/lib/general/gr_int_to_float.h
index 9af381ba9..af6488a50 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.h
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -30,7 +30,7 @@ class gr_int_to_float;
 typedef boost::shared_ptr<gr_int_to_float> gr_int_to_float_sptr;
 
 GR_CORE_API gr_int_to_float_sptr
-gr_make_int_to_float ();
+gr_make_int_to_float (size_t vlen=1, float scale=1);
 
 /*!
  * \brief Convert stream of int to a stream of float
@@ -39,10 +39,18 @@ gr_make_int_to_float ();
 
 class GR_CORE_API gr_int_to_float : public gr_sync_block
 {
-  friend GR_CORE_API gr_int_to_float_sptr gr_make_int_to_float ();
-  gr_int_to_float ();
+ private:
+  friend GR_CORE_API gr_int_to_float_sptr
+    gr_make_int_to_float (size_t vlen, float scale);
+  gr_int_to_float (size_t vlen, float scale);
+
+  size_t d_vlen;
+  float d_scale;  
 
  public:
+  float scale() const;
+  void set_scale(float scale);
+
   virtual int work (int noutput_items,
 		    gr_vector_const_void_star &input_items,
 		    gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.i b/gnuradio-core/src/lib/general/gr_int_to_float.i
index 8cb9e35b5..c1f25e37b 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.i
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -22,9 +22,11 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,int_to_float)
 
-gr_int_to_float_sptr gr_make_int_to_float ();
+gr_int_to_float_sptr
+gr_make_int_to_float (size_t vlen=1, float scale=1);
 
 class gr_int_to_float : public gr_sync_block
 {
-  gr_int_to_float ();
+  float scale() const;
+  void set_scale(float scale);
 };
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.i b/gnuradio-core/src/lib/general/gr_short_to_char.i
index 8fa453a06..330a4fdda 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_char.i
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.i
@@ -22,7 +22,8 @@
 
 GR_SWIG_BLOCK_MAGIC(gr,short_to_char)
 
-gr_short_to_char_sptr gr_make_short_to_char (size_t vlen=1);
+gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen=1);
 
 class gr_short_to_char : public gr_sync_block
 {
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc
index 9f71f52ed..94d376a27 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc
@@ -71,7 +71,6 @@ gr_short_to_float::work (int noutput_items,
   else {
     volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
   }
-  
   return noutput_items;
 }
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
index edfc26409..530b2a5cc 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
@@ -44,6 +44,26 @@ class test_int_to_float (gr_unittest.TestCase):
         
         self.assertFloatTuplesAlmostEqual(expected_result, result_data)
 
+    def test_002(self):
+
+        vlen = 3
+        src_data = ( 65000, 65001, 65002, 65003, 65004, 65005,
+                    -65001, -65002, -65003)
+        expected_result = [ 65000.0,  65001.0,  65002.0, 
+                            65003.0,  65004.0,  65005.0,
+                           -65001.0, -65002.0, -65003.0]
+        src = gr.vector_source_i(src_data)
+        s2v = gr.stream_to_vector(gr.sizeof_int, vlen)
+        op = gr.int_to_float(vlen)
+        v2s = gr.vector_to_stream(gr.sizeof_float, vlen)
+        dst = gr.vector_sink_f()
+
+        self.tb.connect(src, s2v, op, v2s, dst)
+        self.tb.run()
+        result_data = list(dst.data())
+
+        self.assertEqual(expected_result, result_data)
+
 if __name__ == '__main__':
     gr_unittest.run(test_int_to_float, "test_int_to_float.xml")
 
-- 
cgit 


From f6075ca94945510eddc5581b552f5e61ce1d0c46 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 19:25:41 -0500
Subject: core: more type converters in Volk; adding char_to_short converter.

---
 gnuradio-core/src/lib/general/CMakeLists.txt      |  3 ++
 gnuradio-core/src/lib/general/Makefile.am         |  2 +
 gnuradio-core/src/lib/general/general.i           |  6 +++
 gnuradio-core/src/lib/general/gr_char_to_short.cc | 64 +++++++++++++++++++++++
 gnuradio-core/src/lib/general/gr_char_to_short.h  | 55 +++++++++++++++++++
 gnuradio-core/src/lib/general/gr_char_to_short.i  | 30 +++++++++++
 6 files changed, 160 insertions(+)
 create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.h
 create mode 100644 gnuradio-core/src/lib/general/gr_char_to_short.i

diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 6ecaa930a..6635abc8d 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -187,6 +187,7 @@ set(gr_core_general_triple_threats
     gr_bin_statistics_f
     gr_bytes_to_syms
     gr_char_to_float
+    gr_char_to_short
     gr_check_counting_s
     gr_check_lfsr_32k_s
     gr_complex_to_interleaved_short
@@ -229,6 +230,7 @@ set(gr_core_general_triple_threats
     gr_kludge_copy
     gr_lfsr_32k_source_s
     gr_map_bb
+    gr_multiply_cc
     gr_nlog10_ff
     gr_nop
     gr_null_sink
@@ -256,6 +258,7 @@ set(gr_core_general_triple_threats
     gr_rms_ff
     gr_repeat
     gr_short_to_float
+    gr_short_to_char
     gr_simple_correlator
     gr_simple_framer
     gr_simple_squelch_cc
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index 0122932cf..1b802b09c 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -46,6 +46,7 @@ libgeneral_la_SOURCES = 		\
 	gr_bin_statistics_f.cc		\
 	gr_bytes_to_syms.cc		\
 	gr_char_to_float.cc		\
+	gr_char_to_short.cc		\
 	gr_check_counting_s.cc		\
 	gr_check_lfsr_32k_s.cc		\
 	gr_circular_file.cc		\
@@ -123,6 +124,7 @@ libgeneral_la_SOURCES = 		\
 	gr_rms_cf.cc			\
 	gr_rms_ff.cc			\
 	gr_short_to_float.cc		\
+	gr_short_to_int.cc		\
 	gr_int_to_float.cc		\
 	gr_simple_correlator.cc		\
 	gr_simple_framer.cc		\
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index 5a701bf80..ec90e40e5 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -44,8 +44,10 @@
 #include <gr_float_to_char.h>
 #include <gr_float_to_uchar.h>
 #include <gr_short_to_float.h>
+#include <gr_short_to_char.h>
 #include <gr_int_to_float.h>
 #include <gr_char_to_float.h>
+#include <gr_char_to_short.h>
 #include <gr_uchar_to_float.h>
 #include <gr_frequency_modulator_fc.h>
 #include <gr_phase_modulator_fc.h>
@@ -104,6 +106,7 @@
 #include <gr_diff_decoder_bb.h>
 #include <gr_framer_sink_1.h>
 #include <gr_map_bb.h>
+#include <gr_multiply_cc.h>
 #include <gr_feval.h>
 #include <gr_pwr_squelch_cc.h>
 #include <gr_pwr_squelch_ff.h>
@@ -158,8 +161,10 @@
 %include "gr_float_to_char.i"
 %include "gr_float_to_uchar.i"
 %include "gr_short_to_float.i"
+%include "gr_short_to_char.i"
 %include "gr_int_to_float.i"
 %include "gr_char_to_float.i"
+%include "gr_char_to_short.i"
 %include "gr_uchar_to_float.i"
 %include "gr_frequency_modulator_fc.i"
 %include "gr_phase_modulator_fc.i"
@@ -218,6 +223,7 @@
 %include "gr_diff_decoder_bb.i"
 %include "gr_framer_sink_1.i"
 %include "gr_map_bb.i"
+%include "gr_multiply_cc.i"
 %include "gr_feval.i"
 %include "gr_pwr_squelch_cc.i"
 %include "gr_pwr_squelch_ff.i"
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc
new file mode 100644
index 000000000..40ffa9338
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_char_to_short.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_char_to_short_sptr
+gr_make_char_to_short (size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_char_to_short (vlen));
+}
+
+gr_char_to_short::gr_char_to_short (size_t vlen)
+  : gr_sync_block ("gr_char_to_short",
+		   gr_make_io_signature (1, 1, sizeof (char)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (short)*vlen)),
+    d_vlen(vlen)
+{
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
+}
+
+int
+gr_char_to_short::work (int noutput_items,
+			gr_vector_const_void_star &input_items,
+			gr_vector_void_star &output_items)
+{
+  const int8_t *in = (const int8_t *) input_items[0];
+  int16_t *out = (int16_t *) output_items[0];
+
+  if(is_unaligned()) {
+    volk_8i_convert_16i_u(out, in, d_vlen*noutput_items);
+  }
+  else {
+    volk_8i_convert_16i_a(out, in, d_vlen*noutput_items);
+  }
+
+  return noutput_items;
+}
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.h b/gnuradio-core/src/lib/general/gr_char_to_short.h
new file mode 100644
index 000000000..8ed974acf
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.h
@@ -0,0 +1,55 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_CHAR_TO_SHORT_H
+#define INCLUDED_GR_CHAR_TO_SHORT_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_char_to_short;
+typedef boost::shared_ptr<gr_char_to_short> gr_char_to_short_sptr;
+
+GR_CORE_API gr_char_to_short_sptr
+gr_make_char_to_short (size_t vlen=1);
+
+/*!
+ * \brief Convert stream of chars to a stream of float
+ * \ingroup converter_blk
+ */
+
+class GR_CORE_API gr_char_to_short : public gr_sync_block
+{
+  friend GR_CORE_API gr_char_to_short_sptr
+    gr_make_char_to_short (size_t vlen);
+  gr_char_to_short (size_t vlen);
+
+  size_t d_vlen;
+  
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_CHAR_TO_SHORT_H */
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.i b/gnuradio-core/src/lib/general/gr_char_to_short.i
new file mode 100644
index 000000000..48ddbf26b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.i
@@ -0,0 +1,30 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,char_to_short)
+
+gr_char_to_short_sptr gr_make_char_to_short (size_t vlen=1);
+
+class gr_char_to_short : public gr_sync_block
+{
+
+};
-- 
cgit 


From 7ecd13dbeebb5083d309b2a74d158bb1bb26066e Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 19:29:45 -0500
Subject: core: update Makefile.am for new type converters.

---
 gnuradio-core/src/lib/general/CMakeLists.txt | 2 +-
 gnuradio-core/src/lib/general/Makefile.am    | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 6635abc8d..6dc9d411d 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2010-2011 Free Software Foundation, Inc.
+# Copyright 2010-2012 Free Software Foundation, Inc.
 #
 # This file is part of GNU Radio
 #
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index 1b802b09c..65b5a729e 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright 2001,2002,2004,2006,2007,2008,2009 Free Software Foundation, Inc.
+# Copyright 2001,2002,2004,2006-2012 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -197,6 +197,7 @@ grinclude_HEADERS = 			\
 	gr_bin_statistics_f.h		\
 	gr_bytes_to_syms.h		\
 	gr_char_to_float.h		\
+	gr_char_to_short.h		\
 	gr_check_counting_s.h		\
 	gr_check_lfsr_32k_s.h		\
 	gr_circular_file.h		\
@@ -278,6 +279,7 @@ grinclude_HEADERS = 			\
 	gr_reverse.h			\
 	gr_rms_cf.h			\
 	gr_rms_ff.h			\
+	gr_short_to_char.h		\
 	gr_short_to_float.h		\
 	gr_int_to_float.h		\
 	gr_simple_correlator.h		\
@@ -325,7 +327,7 @@ grinclude_HEADERS = 			\
 	gri_int_to_float.h		\
 	gri_lfsr_15_1_0.h		\
 	gri_lfsr_32k.h			\
-	gri_short_to_float.h		\
+	gri_short_to_char.h		\
 	gri_uchar_to_float.h		\
 	malloc16.h			\
 	random.h			\
@@ -363,6 +365,7 @@ swiginclude_HEADERS =			\
 	gr_bin_statistics_f.i		\
 	gr_bytes_to_syms.i		\
 	gr_char_to_float.i		\
+	gr_char_to_short.i		\
 	gr_check_counting_s.i		\
 	gr_check_lfsr_32k_s.i		\
 	gr_complex_to_interleaved_short.i \
@@ -432,6 +435,7 @@ swiginclude_HEADERS =			\
 	gr_rms_cf.i			\
 	gr_rms_ff.i			\
 	gr_repeat.i			\
+	gr_short_to_char.i		\
 	gr_short_to_float.i		\
 	gr_simple_correlator.i		\
 	gr_simple_framer.i		\
-- 
cgit 


From d870487437b33d1a26eb8f5f5aaa414ca6ed24e0 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 28 Jan 2012 19:31:33 -0500
Subject: volk: fix lower bound of int conversion.

---
 volk/include/volk/volk_32f_s32f_convert_32i_a.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index 15fa282fb..8f2fc791e 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -22,7 +22,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
-  float min_val = -2147483647;
+  float min_val = -2147483648;
   float max_val = 2147483647;
   float r;
 
@@ -71,7 +71,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
   const float* inputVectorPtr = (const float*)inputVector;
   int32_t* outputVectorPtr = outputVector;
 
-  float min_val = -2147483647;
+  float min_val = -2147483648;
   float max_val = 2147483647;
   float r;
 
-- 
cgit 


From 9c9b9a8aa05c7cde7cd11cd5d1e052630715d252 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 29 Jan 2012 17:31:37 -0500
Subject: volk: added unaligned volk function for magnitude of a complex
 number.

---
 volk/include/volk/volk_32fc_magnitude_32f_u.h | 118 ++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 volk/include/volk/volk_32fc_magnitude_32f_u.h

diff --git a/volk/include/volk/volk_32fc_magnitude_32f_u.h b/volk/include/volk/volk_32fc_magnitude_32f_u.h
new file mode 100644
index 000000000..ed1cedef9
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_32f_u.h
@@ -0,0 +1,118 @@
+#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
+#define INCLUDED_volk_32fc_magnitude_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+      result = _mm_sqrt_ps(result);
+
+      _mm_storeu_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+      float val1Real = *complexVectorPtr++;
+      float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
+    }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+  /*!
+    \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+    
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      // Arrange in i1i2i3i4 format
+      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+      // Arrange in q1q2q3q4 format
+      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+      result = _mm_sqrt_ps(result);
+
+      _mm_storeu_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+       float val1Real = *complexVectorPtr++;
+       float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+  const float* complexVectorPtr = (float*)complexVector;
+  float* magnitudeVectorPtr = magnitudeVector;
+  unsigned int number = 0;
+  for(number = 0; number < num_points; number++){
+    const float real = *complexVectorPtr++;
+    const float imag = *complexVectorPtr++;
+    *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
-- 
cgit 


From d142fd9e31715bae85d65c4790f278143a784142 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 29 Jan 2012 17:32:30 -0500
Subject: volk: added volk magnitiude squared functions (aligned/unaligned) for
 complex numbers.

---
 volk/include/volk/Makefile.am                      |   3 +
 .../volk/volk_32fc_magnitude_squared_32f_a.h       | 114 +++++++++++++++++++++
 .../volk/volk_32fc_magnitude_squared_32f_u.h       | 114 +++++++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
 create mode 100644 volk/include/volk/volk_32fc_magnitude_squared_32f_u.h

diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index e7333a015..c2502f6e6 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -71,6 +71,9 @@ volkinclude_HEADERS = \
 	volk_32fc_index_max_16u_a.h \
 	volk_32fc_s32f_magnitude_16i_a.h \
 	volk_32fc_magnitude_32f_a.h \
+	volk_32fc_magnitude_32f_u.h \
+	volk_32fc_magnitude_squared_32f_a.h \
+	volk_32fc_magnitude_squared_32f_u.h \
 	volk_32fc_x2_multiply_32fc_a.h \
 	volk_32f_s32f_convert_16i_a.h \
 	volk_32f_s32f_convert_16i_u.h \
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
new file mode 100644
index 000000000..00bdefbb5
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
@@ -0,0 +1,114 @@
+#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
+#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_load_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_load_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+      _mm_store_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+      float val1Real = *complexVectorPtr++;
+      float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+    }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+    
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_load_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_load_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      // Arrange in i1i2i3i4 format
+      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+      // Arrange in q1q2q3q4 format
+      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+      _mm_store_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+       float val1Real = *complexVectorPtr++;
+       float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+  const float* complexVectorPtr = (float*)complexVector;
+  float* magnitudeVectorPtr = magnitudeVector;
+  unsigned int number = 0;
+  for(number = 0; number < num_points; number++){
+    const float real = *complexVectorPtr++;
+    const float imag = *complexVectorPtr++;
+    *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
new file mode 100644
index 000000000..6eb4a523a
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
@@ -0,0 +1,114 @@
+#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
+#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+      _mm_storeu_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+      float val1Real = *complexVectorPtr++;
+      float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+    }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+    
+    const float* complexVectorPtr = (float*)complexVector;
+    float* magnitudeVectorPtr = magnitudeVector;
+
+    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+    for(;number < quarterPoints; number++){
+      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+      complexVectorPtr += 4;
+
+      // Arrange in i1i2i3i4 format
+      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+      // Arrange in q1q2q3q4 format
+      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+      _mm_storeu_ps(magnitudeVectorPtr, result);
+      magnitudeVectorPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(; number < num_points; number++){
+       float val1Real = *complexVectorPtr++;
+       float val1Imag = *complexVectorPtr++;
+      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+static inline void volk_32fc_magnitude_squared_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+  const float* complexVectorPtr = (float*)complexVector;
+  float* magnitudeVectorPtr = magnitudeVector;
+  unsigned int number = 0;
+  for(number = 0; number < num_points; number++){
+    const float real = *complexVectorPtr++;
+    const float imag = *complexVectorPtr++;
+    *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
-- 
cgit 


From 812144f4c153dca385b65e79401c9f1d88b90173 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 29 Jan 2012 17:34:06 -0500
Subject: core: switched complex to mag and mag_squared to use Volk functions.

---
 gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 25 +++++++++++++++-------
 gnuradio-core/src/lib/general/gr_complex_to_xxx.h  |  5 +++--
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
index a59c127f3..0bdd06547 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
@@ -1,6 +1,6 @@
 /* -*- c++ -*- */
 /*
- * Copyright 2004,2008,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2008,2010,2012 Free Software Foundation, Inc.
  * 
  * This file is part of GNU Radio
  * 
@@ -27,6 +27,7 @@
 #include <gr_complex_to_xxx.h>
 #include <gr_io_signature.h>
 #include <gr_math.h>
+#include <volk/volk.h>
 
 // ----------------------------------------------------------------
 
@@ -152,6 +153,9 @@ gr_complex_to_mag::gr_complex_to_mag (unsigned int vlen)
 		   gr_make_io_signature (1, 1, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -163,9 +167,9 @@ gr_complex_to_mag::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-  for (int i = 0; i < noi; i++){
-    out[i] = std::abs (in[i]);
-  }
+  // turned out to be faster than aligned/unaligned switching
+  volk_32fc_magnitude_32f_u(out, in, noi);
+
   return noutput_items;
 }
 
@@ -183,6 +187,9 @@ gr_complex_to_mag_squared::gr_complex_to_mag_squared (unsigned int vlen)
 		   gr_make_io_signature (1, 1, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -194,11 +201,13 @@ gr_complex_to_mag_squared::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-  for (int i = 0; i < noi; i++){
-    const float __x = in[i].real();
-    const float __y = in[i].imag();
-    out[i] = __x * __x + __y * __y;
+  if(unaligned()) {
+    volk_32fc_magnitude_squared_32f_u(out, in, noi);
+  }
+  else {
+    volk_32fc_magnitude_squared_32f_a(out, in, noi);
   }
+
   return noutput_items;
 }
 
diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
index 166403259..232071323 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
@@ -109,10 +109,11 @@ class GR_CORE_API gr_complex_to_imag : public gr_sync_block
  */
 class GR_CORE_API gr_complex_to_mag : public gr_sync_block
 {
-  friend GR_CORE_API gr_complex_to_mag_sptr gr_make_complex_to_mag (unsigned int vlen);
+  friend GR_CORE_API gr_complex_to_mag_sptr
+    gr_make_complex_to_mag (unsigned int vlen);
   gr_complex_to_mag (unsigned int vlen);
 
-  unsigned int	d_vlen;
+  unsigned int d_vlen;
 
  public:
   virtual int work (int noutput_items,
-- 
cgit 


From 83d0bf5d513e516d700e552fa2773dd852a6608a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 29 Jan 2012 17:35:09 -0500
Subject: core: minor edits.

---
 gnuradio-core/src/lib/general/gr_char_to_short.h   | 1 +
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.h b/gnuradio-core/src/lib/general/gr_char_to_short.h
index 8ed974acf..58f9a62b0 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_short.h
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.h
@@ -39,6 +39,7 @@ gr_make_char_to_short (size_t vlen=1);
 
 class GR_CORE_API gr_char_to_short : public gr_sync_block
 {
+ private:
   friend GR_CORE_API gr_char_to_short_sptr
     gr_make_char_to_short (size_t vlen);
   gr_char_to_short (size_t vlen);
diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index 3191246a7..02bb4873c 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -36,7 +36,7 @@
 #include <stdio.h>
 
 // must be defined to either 0 or 1
-#define ENABLE_LOGGING 1
+#define ENABLE_LOGGING 0
 
 #if (ENABLE_LOGGING)
 #define LOG(x) do { x; } while(0)
-- 
cgit 


From cb458204245632ac7a571bfe96b90d3c55a2f01a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 30 Jan 2012 00:22:44 -0500
Subject: volk: adding complex to imag kernel.

---
 volk/include/volk/Makefile.am                      |  1 +
 .../volk/volk_32fc_deinterleave_imag_32f_a.h       | 68 ++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h

diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index c2502f6e6..c5b99c41b 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -65,6 +65,7 @@ volkinclude_HEADERS = \
 	volk_32fc_deinterleave_64f_x2_a.h \
 	volk_32fc_s32f_deinterleave_real_16i_a.h \
 	volk_32fc_deinterleave_real_32f_a.h \
+	volk_32fc_deinterleave_imag_32f_a.h \
 	volk_32fc_deinterleave_real_64f_a.h \
 	volk_32fc_x2_dot_prod_32fc_a.h \
 	volk_32fc_x2_dot_prod_32fc_u.h \
diff --git a/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
new file mode 100644
index 000000000..adc4112b9
--- /dev/null
+++ b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
@@ -0,0 +1,68 @@
+#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
+#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Deinterleaves the complex vector into Q vector data
+  \param complexVector The complex input vector
+  \param qBuffer The Q buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+  unsigned int number = 0;
+  const unsigned int quarterPoints = num_points / 4;
+
+  const float* complexVectorPtr = (const float*)complexVector;
+  float* qBufferPtr = qBuffer;
+
+  __m128 cplxValue1, cplxValue2, iValue;
+  for(;number < quarterPoints; number++){
+      
+    cplxValue1 = _mm_load_ps(complexVectorPtr);
+    complexVectorPtr += 4;
+
+    cplxValue2 = _mm_load_ps(complexVectorPtr);
+    complexVectorPtr += 4;
+
+    // Arrange in q1q2q3q4 format
+    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+    _mm_store_ps(qBufferPtr, iValue);
+
+    qBufferPtr += 4;
+  }
+
+  number = quarterPoints * 4;
+  for(; number < num_points; number++){
+    complexVectorPtr++;
+    *qBufferPtr++ = *complexVectorPtr++;
+  }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Deinterleaves the complex vector into Q vector data
+  \param complexVector The complex input vector
+  \param qBuffer The I buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_a_generic(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+  unsigned int number = 0;
+  const float* complexVectorPtr = (float*)complexVector;
+  float* qBufferPtr = qBuffer;
+  for(number = 0; number < num_points; number++){
+    complexVectorPtr++;
+    *qBufferPtr++ = *complexVectorPtr++;
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
-- 
cgit 


From 4769d5ff75520661ce52fae229482a96e651f7e2 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 30 Jan 2012 00:23:11 -0500
Subject: core: complex_to_xxx (float, real, imag, arg) to volk.

---
 gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 60 +++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
index 0bdd06547..9ed86f368 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
@@ -57,17 +57,41 @@ gr_complex_to_float::work (int noutput_items,
 
   switch (output_items.size ()){
   case 1:
+#if 1
+    if(is_unaligned()) {
+      for (int i = 0; i < noi; i++){
+	out0[i] = in[i].real ();
+      }
+    }
+    else {
+      volk_32fc_deinterleave_real_32f_a(out0, in, noi);
+    }
+#else
     for (int i = 0; i < noi; i++){
       out0[i] = in[i].real ();
     }
+#endif
     break;
 
   case 2:
+#if 1
+    out1 = (float *) output_items[1];
+    if(is_unaligned()) {
+      for (int i = 0; i < noi; i++){
+	out0[i] = in[i].real ();
+	out1[i] = in[i].imag ();
+      }
+    }
+    else {
+      volk_32fc_deinterleave_32f_x2_a(out0, out1, in, noi);
+    }
+#else
     out1 = (float *) output_items[1];
     for (int i = 0; i < noi; i++){
       out0[i] = in[i].real ();
       out1[i] = in[i].imag ();
     }
+#endif
     break;
 
   default:
@@ -102,9 +126,21 @@ gr_complex_to_real::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
+#if 1
+  if(is_unaligned()) {
+    for (int i = 0; i < noi; i++){
+      out[i] = in[i].real ();
+    }
+  }
+  else {
+    volk_32fc_deinterleave_real_32f_a(out, in, noi);
+  }
+#else
   for (int i = 0; i < noi; i++){
-    out[i] = in[i].real ();
+    out0[i] = in[i].real ();
   }
+#endif
+  
   return noutput_items;
 }
 
@@ -133,9 +169,20 @@ gr_complex_to_imag::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
+#if 1
+  if(is_unaligned()) {
+    for (int i = 0; i < noi; i++){
+      out[i] = in[i].imag ();
+    }
+  }
+  else {
+    volk_32fc_deinterleave_imag_32f_a(out, in, noi);
+  }
+#else
   for (int i = 0; i < noi; i++){
     out[i] = in[i].imag ();
   }
+#endif
   return noutput_items;
 }
 
@@ -236,9 +283,20 @@ gr_complex_to_arg::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
+#if 1
+  if(is_unaligned()) {
+    for (int i = 0; i < noi; i++){
+      out[i] = gr_fast_atan2f(in[i]);
+    }
+  }
+  else {
+    volk_32fc_s32f_atan2_32f_a(out, in, 1, noi);
+  }
+#else
   for (int i = 0; i < noi; i++){
     //    out[i] = std::arg (in[i]);
     out[i] = gr_fast_atan2f(in[i]);
   }
+#endif
   return noutput_items;
 }
-- 
cgit 


From 046385126d92cf9179ac84ede06d3d50e1c9030f Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 30 Jan 2012 12:12:44 -0500
Subject: core: fixing up complex_to_xxx for using Volk where appropriate.
 Speed benchmark were used to decide which implementation to use.

---
 gnuradio-core/src/lib/general/gr_complex_to_xxx.cc | 54 +++++++---------------
 .../src/python/gnuradio/gr/qa_complex_to_xxx.py    |  2 +-
 2 files changed, 17 insertions(+), 39 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
index 9ed86f368..108a92835 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
@@ -43,6 +43,9 @@ gr_complex_to_float::gr_complex_to_float (unsigned int vlen)
 		   gr_make_io_signature (1, 2, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -57,7 +60,6 @@ gr_complex_to_float::work (int noutput_items,
 
   switch (output_items.size ()){
   case 1:
-#if 1
     if(is_unaligned()) {
       for (int i = 0; i < noi; i++){
 	out0[i] = in[i].real ();
@@ -66,15 +68,9 @@ gr_complex_to_float::work (int noutput_items,
     else {
       volk_32fc_deinterleave_real_32f_a(out0, in, noi);
     }
-#else
-    for (int i = 0; i < noi; i++){
-      out0[i] = in[i].real ();
-    }
-#endif
     break;
 
   case 2:
-#if 1
     out1 = (float *) output_items[1];
     if(is_unaligned()) {
       for (int i = 0; i < noi; i++){
@@ -85,13 +81,6 @@ gr_complex_to_float::work (int noutput_items,
     else {
       volk_32fc_deinterleave_32f_x2_a(out0, out1, in, noi);
     }
-#else
-    out1 = (float *) output_items[1];
-    for (int i = 0; i < noi; i++){
-      out0[i] = in[i].real ();
-      out1[i] = in[i].imag ();
-    }
-#endif
     break;
 
   default:
@@ -115,6 +104,9 @@ gr_complex_to_real::gr_complex_to_real (unsigned int vlen)
 		   gr_make_io_signature (1, 1, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -126,7 +118,6 @@ gr_complex_to_real::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-#if 1
   if(is_unaligned()) {
     for (int i = 0; i < noi; i++){
       out[i] = in[i].real ();
@@ -135,11 +126,6 @@ gr_complex_to_real::work (int noutput_items,
   else {
     volk_32fc_deinterleave_real_32f_a(out, in, noi);
   }
-#else
-  for (int i = 0; i < noi; i++){
-    out0[i] = in[i].real ();
-  }
-#endif
   
   return noutput_items;
 }
@@ -158,6 +144,9 @@ gr_complex_to_imag::gr_complex_to_imag (unsigned int vlen)
 		   gr_make_io_signature (1, 1, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -169,7 +158,6 @@ gr_complex_to_imag::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-#if 1
   if(is_unaligned()) {
     for (int i = 0; i < noi; i++){
       out[i] = in[i].imag ();
@@ -178,11 +166,7 @@ gr_complex_to_imag::work (int noutput_items,
   else {
     volk_32fc_deinterleave_imag_32f_a(out, in, noi);
   }
-#else
-  for (int i = 0; i < noi; i++){
-    out[i] = in[i].imag ();
-  }
-#endif
+
   return noutput_items;
 }
 
@@ -248,7 +232,7 @@ gr_complex_to_mag_squared::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-  if(unaligned()) {
+  if(is_unaligned()) {
     volk_32fc_magnitude_squared_32f_u(out, in, noi);
   }
   else {
@@ -272,6 +256,9 @@ gr_complex_to_arg::gr_complex_to_arg (unsigned int vlen)
 		   gr_make_io_signature (1, 1, sizeof (float) * vlen)),
     d_vlen(vlen)
 {
+  const int alignment_multiple =
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -283,20 +270,11 @@ gr_complex_to_arg::work (int noutput_items,
   float *out = (float *) output_items[0];
   int noi = noutput_items * d_vlen;
 
-#if 1
-  if(is_unaligned()) {
-    for (int i = 0; i < noi; i++){
-      out[i] = gr_fast_atan2f(in[i]);
-    }
-  }
-  else {
-    volk_32fc_s32f_atan2_32f_a(out, in, 1, noi);
-  }
-#else
+  // The fast_atan2f is faster than Volk
   for (int i = 0; i < noi; i++){
     //    out[i] = std::arg (in[i]);
     out[i] = gr_fast_atan2f(in[i]);
   }
-#endif
+
   return noutput_items;
 }
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
index 76627247b..01679dc05 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
@@ -134,7 +134,7 @@ class test_complex_ops (gr_unittest.TestCase):
         self.tb.run ()
         actual_result = dst.data ()
 
-        self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 5)
+        self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 3)
 
 
 if __name__ == '__main__':
-- 
cgit 


From 6385380c812a8cd1470073d01c7e8b6006b8f398 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 30 Jan 2012 18:50:29 -0500
Subject: core: redo fft_filter (complex and float) with Volk.

No need for sse implementation now but keeping code for reference.
---
 gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc          | 5 -----
 gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc          | 7 +------
 gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc | 5 ++---
 gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc | 5 ++---
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
index 9fa98cc69..d523404c9 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
@@ -30,7 +30,6 @@
 #endif
 
 #include <gr_fft_filter_ccc.h>
-//#include <gri_fft_filter_ccc_sse.h>
 #include <gri_fft_filter_ccc_generic.h>
 #include <gr_io_signature.h>
 #include <gri_fft.h>
@@ -57,11 +56,7 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation, const std::vector<gr_compl
     d_updated(false)
 {
   set_history(1);
-#if 1 // don't enable the sse version until handling it is worked out
   d_filter = new gri_fft_filter_ccc_generic(decimation, taps);
-#else
-  d_filter = new gri_fft_filter_ccc_sse(decimation, taps);
-#endif
   d_new_taps = taps;
   d_nsamples = d_filter->set_taps(taps);
   set_output_multiple(d_nsamples);
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
index c0a9b3483..640851a1d 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
@@ -26,7 +26,6 @@
 
 #include <gr_fft_filter_fff.h>
 #include <gri_fft_filter_fff_generic.h>
-//#include <gri_fft_filter_fff_sse.h>
 #include <gr_io_signature.h>
 #include <assert.h>
 #include <stdexcept>
@@ -50,11 +49,7 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation, const std::vector<float> &
 {
   set_history(1);
   
-#if 1 // don't enable the sse version until handling it is worked out
-    d_filter = new gri_fft_filter_fff_generic(decimation, taps);
-#else
-    d_filter = new gri_fft_filter_fff_sse(decimation, taps);
-#endif
+  d_filter = new gri_fft_filter_fff_generic(decimation, taps);
   d_new_taps = taps;
   d_nsamples = d_filter->set_taps(taps);
   set_output_multiple(d_nsamples);
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
index 891905dd0..d9700ad2e 100644
--- a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
@@ -26,6 +26,7 @@
 
 #include <gri_fft_filter_ccc_generic.h>
 #include <gri_fft.h>
+#include <volk/volk.h>
 #include <assert.h>
 #include <stdexcept>
 #include <cstdio>
@@ -137,9 +138,7 @@ gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_comp
     gr_complex *b = &d_xformed_taps[0];
     gr_complex *c = d_invfft->get_inbuf();
 
-    for (j = 0; j < d_fftsize; j+=1) {	// filter in the freq domain
-      c[j] = a[j] * b[j];
-    } 
+    volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize);
     
     d_invfft->execute();	// compute inv xform
 
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
index b3fbe1d1a..64705ee5e 100644
--- a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
@@ -26,6 +26,7 @@
 
 #include <gri_fft_filter_fff_generic.h>
 #include <gri_fft.h>
+#include <volk/volk.h>
 #include <assert.h>
 #include <stdexcept>
 #include <cstdio>
@@ -124,9 +125,7 @@ gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *outpu
     gr_complex *b = &d_xformed_taps[0];
     gr_complex *c = d_invfft->get_inbuf();
 
-    for (j = 0; j < d_fftsize/2+1; j++) {	// filter in the freq domain
-      c[j] = a[j] * b[j];
-    }      
+    volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize/2+1);
    
     d_invfft->execute();	// compute inv xform
 
-- 
cgit 


From 070a6c9ce93c2f2043a0145e253cc55f801f9433 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 14:25:53 -0500
Subject: volk: adding unaligned versions of complex multiply a constant and
 complex multiply 2 streams.

---
 .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 42 ++++++++++++
 volk/include/volk/volk_32fc_x2_multiply_32fc_u.h   | 77 ++++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
 create mode 100644 volk/include/volk/volk_32fc_x2_multiply_32fc_u.h

diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
new file mode 100644
index 000000000..201dcf5f6
--- /dev/null
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -0,0 +1,42 @@
+#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
+#define INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    unsigned int number = num_points;
+
+    // unwrap loop
+    while (number >= 8){
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      number -= 8;
+    }
+
+    // clean up any remaining
+    while (number-- > 0)
+      *cPtr++ = *aPtr++ * scalar;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
new file mode 100644
index 000000000..729c1a4ad
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
@@ -0,0 +1,77 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H
+#define INCLUDED_volk_32fc_x2_multiply_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+  unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x, y, yl, yh, z, tmp1, tmp2;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+    const lv_32fc_t* b = bVector;
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+      y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+      
+      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+      
+      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+      
+      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+      
+      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+      
+      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+   
+      _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+      a += 2;
+      b += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = (*a) * (*b);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    const lv_32fc_t* bPtr=  bVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = (*aPtr++) * (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
-- 
cgit 


From c6519775ac7d1e9098ca2fb20c09e15ae8e60a4a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:34:23 -0500
Subject: sched: better working alignment handling.

Works with max_noutput_items and set_output_multiple.
---
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 53 ++++++++++------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index 02bb4873c..11c6639b3 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -83,11 +83,6 @@ min_available_space (gr_block_detail *d, int output_multiple)
       }
       return 0;
     }
-    else if (n > output_multiple) {
-      // adjust this or we often ask for too many, 
-      // causing a re-calc for fewer items.
-      n = n-output_multiple;
-    }
     min_space = std::min (min_space, n);
   }
   return min_space;
@@ -316,7 +311,7 @@ gr_block_executor::run_one_iteration()
       
       // only test this if we specifically set the output_multiple
       if(m->output_multiple_set())
-	reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple());
+	reqd_noutput_items = round_down(reqd_noutput_items, m->output_multiple());
 
       if (reqd_noutput_items > 0 && reqd_noutput_items <= noutput_items)
 	noutput_items = reqd_noutput_items;
@@ -329,32 +324,34 @@ gr_block_executor::run_one_iteration()
     // Check if we're still unaligned; use up items until we're
     // aligned again. Otherwise, make sure we set the alignment
     // requirement.
-    if(m->is_unaligned()) {
-      // When unaligned, don't just set noutput_items to the remaining
-      // samples to meet alignment; this causes too much overhead in
-      // requiring a premature call back here. Set the maximum amount
-      // of samples to handle unalignment and get us back aligned.
-      if(noutput_items >= m->unaligned()) {
-	noutput_items = round_up(noutput_items, m->alignment()) \
-	  - (m->alignment() - m->unaligned());
-	new_alignment = 0;
+    if(!m->output_multiple_set()) {
+      if(m->is_unaligned()) {
+	// When unaligned, don't just set noutput_items to the remaining
+	// samples to meet alignment; this causes too much overhead in
+	// requiring a premature call back here. Set the maximum amount
+	// of samples to handle unalignment and get us back aligned.
+	if(noutput_items >= m->unaligned()) {
+	  noutput_items = round_up(noutput_items, m->alignment())	\
+	    - (m->alignment() - m->unaligned());
+	  new_alignment = 0;
+	}
+	else {
+	  new_alignment = m->unaligned() - noutput_items;
+	}
+      }
+      else if(noutput_items < m->alignment()) {
+	// if we don't have enough for an aligned call, keep track of
+	// misalignment, set unaligned flag, and proceed.
+	new_alignment = m->alignment() - noutput_items;
+	m->set_unaligned(new_alignment);
+	m->set_is_unaligned(true);
       }
       else {
-	new_alignment = m->unaligned() - noutput_items;
+	// enough to round down to the nearest alignment and process.
+	noutput_items = round_down(noutput_items, m->alignment());
+	m->set_is_unaligned(false);
       }
     }
-    else if(noutput_items < m->alignment()) {
-      // if we don't have enough for an aligned call, keep track of
-      // misalignment, set unaligned flag, and proceed.
-      new_alignment = m->alignment() - noutput_items;
-      m->set_unaligned(new_alignment);
-      m->set_is_unaligned(true);
-    }
-    else {
-      // enough to round down to the nearest alignment and process.
-      noutput_items = round_down(noutput_items, m->alignment());
-      m->set_is_unaligned(false);
-    }
 
     // ask the block how much input they need to produce noutput_items
     m->forecast (noutput_items, d_ninput_items_required);
-- 
cgit 


From 67d23bdecd3f15197bdf46f8c0cd66a6f754fea5 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:35:14 -0500
Subject: volk: improving performance of multiply_const and multiply two
 streams.

---
 .../include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 63 +++++++++++++++++++++-
 .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 45 ++++++++++++++++
 volk/include/volk/volk_32fc_x2_multiply_32fc_a.h   |  1 -
 3 files changed, 106 insertions(+), 3 deletions(-)

diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
index b27a7259f..205461afb 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -6,6 +6,52 @@
 #include <volk/volk_complex.h>
 #include <float.h>
 
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+  unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x, yl, yh, z, tmp1, tmp2;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+
+    // Set up constant scalar vector
+    yl = _mm_set_ps1(lv_creal(scalar));
+    yh = _mm_set_ps1(lv_cimag(scalar));
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+      
+      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+      
+      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+      
+      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+      
+      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+    
+      _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+      a += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = (*a) * scalar;
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+
 #ifdef LV_HAVE_GENERIC
   /*!
     \brief Multiplies the two input complex vectors and stores their results in the third vector
@@ -17,11 +63,24 @@
 static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
     lv_32fc_t* cPtr = cVector;
     const lv_32fc_t* aPtr = aVector;
-    unsigned int number = 0;
+    unsigned int number = num_points;
 
-    for(number = 0; number < num_points; number++){
+    // unwrap loop
+    while (number >= 8){
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
       *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      *cPtr++ = (*aPtr++) * scalar;
+      number -= 8;
     }
+
+    // clean up any remaining
+    while (number-- > 0)
+      *cPtr++ = *aPtr++ * scalar;
 }
 #endif /* LV_HAVE_GENERIC */
 
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
index 201dcf5f6..a9dfcda19 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -6,6 +6,51 @@
 #include <volk/volk_complex.h>
 #include <float.h>
 
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+  unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x, yl, yh, z, tmp1, tmp2;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+
+    // Set up constant scalar vector
+    yl = _mm_set_ps1(lv_creal(scalar));
+    yh = _mm_set_ps1(lv_cimag(scalar));
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+      
+      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+      
+      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+      
+      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+      
+      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+    
+      _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+      a += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = (*a) * scalar;
+    }
+}
+#endif /* LV_HAVE_SSE */
+
 #ifdef LV_HAVE_GENERIC
   /*!
     \brief Multiplies the two input complex vectors and stores their results in the third vector
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
index 18dd092e8..aec8bd716 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
@@ -23,7 +23,6 @@ static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const l
     lv_32fc_t* c = cVector;
     const lv_32fc_t* a = aVector;
     const lv_32fc_t* b = bVector;
-
     for(;number < halfPoints; number++){
       
       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-- 
cgit 


From e8089db25b2e28824f11d27c9d98a4adef191736 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:36:47 -0500
Subject: core: moving multiply_cc and multiply_const_cc out of gengen and into
 general so they can make use of volk calls.

QA code now explicitly tests the cc versions of these blocks.
---
 gnuradio-core/src/lib/general/.gitignore           |  6 --
 gnuradio-core/src/lib/general/CMakeLists.txt       |  1 +
 gnuradio-core/src/lib/general/general.i            |  2 +
 gnuradio-core/src/lib/general/gr_multiply_cc.cc    | 69 ++++++++++++++++++
 gnuradio-core/src/lib/general/gr_multiply_cc.h     | 56 +++++++++++++++
 gnuradio-core/src/lib/general/gr_multiply_cc.i     | 32 +++++++++
 .../src/lib/general/gr_multiply_const_cc.cc        | 82 ++++++++++++++++++++++
 .../src/lib/general/gr_multiply_const_cc.h         | 60 ++++++++++++++++
 .../src/lib/general/gr_multiply_const_cc.i         | 33 +++++++++
 gnuradio-core/src/lib/gengen/.gitignore            |  6 --
 gnuradio-core/src/lib/gengen/CMakeLists.txt        |  4 +-
 gnuradio-core/src/lib/gengen/Makefile.gen          |  6 --
 gnuradio-core/src/lib/gengen/generate_common.py    |  6 +-
 .../src/python/gnuradio/gr/qa_add_and_friends.py   | 20 ++++++
 14 files changed, 360 insertions(+), 23 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.h
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_cc.i
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.h
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_cc.i

diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore
index 4f3696f58..349651e0c 100644
--- a/gnuradio-core/src/lib/general/.gitignore
+++ b/gnuradio-core/src/lib/general/.gitignore
@@ -158,12 +158,6 @@
 /gr_divide_ss.cc
 /gr_divide_ss.h
 /gr_divide_ss.i
-/gr_multiply_cc.cc
-/gr_multiply_cc.h
-/gr_multiply_cc.i
-/gr_multiply_const_cc.cc
-/gr_multiply_const_cc.h
-/gr_multiply_const_cc.i
 /gr_multiply_const_ff.cc
 /gr_multiply_const_ff.h
 /gr_multiply_const_ff.i
diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 6dc9d411d..6afdfe27c 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -231,6 +231,7 @@ set(gr_core_general_triple_threats
     gr_lfsr_32k_source_s
     gr_map_bb
     gr_multiply_cc
+    gr_multiply_const_cc
     gr_nlog10_ff
     gr_nop
     gr_null_sink
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index ec90e40e5..7de13258e 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -107,6 +107,7 @@
 #include <gr_framer_sink_1.h>
 #include <gr_map_bb.h>
 #include <gr_multiply_cc.h>
+#include <gr_multiply_const_cc.h>
 #include <gr_feval.h>
 #include <gr_pwr_squelch_cc.h>
 #include <gr_pwr_squelch_ff.h>
@@ -224,6 +225,7 @@
 %include "gr_framer_sink_1.i"
 %include "gr_map_bb.i"
 %include "gr_multiply_cc.i"
+%include "gr_multiply_const_cc.i"
 %include "gr_feval.i"
 %include "gr_pwr_squelch_cc.i"
 %include "gr_pwr_squelch_ff.i"
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_cc.cc
new file mode 100644
index 000000000..0d20e6257
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_multiply_cc (vlen));
+}
+
+gr_multiply_cc::gr_multiply_cc (size_t vlen)
+  : gr_sync_block ("gr_multiply_cc",
+		   gr_make_io_signature (1, -1, sizeof (gr_complex)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+    d_vlen(vlen)
+{
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_cc::work (int noutput_items,
+		      gr_vector_const_void_star &input_items,
+		      gr_vector_void_star &output_items)
+{
+  gr_complex *out = (gr_complex *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  memcpy(out, input_items[0], noi*sizeof(gr_complex));
+  if(is_unaligned()) {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32fc_x2_multiply_32fc_u(out, out, (gr_complex*)input_items[i], noi);
+  }
+  else {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32fc_x2_multiply_32fc_a(out, out, (gr_complex*)input_items[i], noi);
+  }
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.h b/gnuradio-core/src/lib/general/gr_multiply_cc.h
new file mode 100644
index 000000000..f80ec8b25
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CC_H
+#define INCLUDED_GR_MULTIPLY_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_cc;
+typedef boost::shared_ptr<gr_multiply_cc> gr_multiply_cc_sptr;
+
+GR_CORE_API gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen=1);
+
+/*!
+ * \brief Multiply streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_cc : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_multiply_cc_sptr
+    gr_make_multiply_cc (size_t vlen);
+  gr_multiply_cc (size_t vlen);
+  
+  size_t d_vlen;
+
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.i b/gnuradio-core/src/lib/general/gr_multiply_cc.i
new file mode 100644
index 000000000..61768c390
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_cc)
+
+gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen=1);
+
+class gr_multiply_cc : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
new file mode 100644
index 000000000..e301ae8eb
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
@@ -0,0 +1,82 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_const_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_multiply_const_cc (k, vlen));
+}
+
+gr_multiply_const_cc::gr_multiply_const_cc (gr_complex k, size_t vlen)
+  : gr_sync_block ("gr_multiply_const_cc",
+		   gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+    d_k(k), d_vlen(vlen)
+{
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+gr_complex
+gr_multiply_const_cc::k() const
+{
+  return d_k;
+}
+
+void
+gr_multiply_const_cc::set_k(gr_complex k)
+{
+  d_k = k;
+}
+
+#include <cstdio>
+
+int
+gr_multiply_const_cc::work (int noutput_items,
+			    gr_vector_const_void_star &input_items,
+			    gr_vector_void_star &output_items)
+{
+  const gr_complex *in = (const gr_complex *) input_items[0];
+  gr_complex *out = (gr_complex *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  if(is_unaligned()) {
+    volk_32fc_s32fc_multiply_32fc_u(out, in, d_k, noi);
+  }
+  else {
+    volk_32fc_s32fc_multiply_32fc_a(out, in, d_k, noi);
+  }
+
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.h b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h
new file mode 100644
index 000000000..1791d9160
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONST_CC_H
+#define INCLUDED_GR_MULTIPLY_CONST_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_const_cc;
+typedef boost::shared_ptr<gr_multiply_const_cc> gr_multiply_const_cc_sptr;
+
+GR_CORE_API gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen=1);
+
+/*!
+ * \brief Multiply stream of complex values with a constant \p k
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_const_cc : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_multiply_const_cc_sptr
+    gr_make_multiply_const_cc (gr_complex k, size_t vlen);
+  gr_multiply_const_cc (gr_complex k, size_t vlen);
+
+  gr_complex d_k;
+  size_t d_vlen;
+
+ public:
+  gr_complex k() const;
+  void set_k(gr_complex k);
+
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONST_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.i b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i
new file mode 100644
index 000000000..be8d32b31
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i
@@ -0,0 +1,33 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_const_cc)
+
+gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen=1);
+
+class gr_multiply_const_cc : public gr_sync_block
+{
+public:
+  gr_complex k() const;
+  void set_k(gr_complex k);
+};
diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore
index ecd4cb0d5..4422ae0dd 100644
--- a/gnuradio-core/src/lib/gengen/.gitignore
+++ b/gnuradio-core/src/lib/gengen/.gitignore
@@ -202,12 +202,6 @@
 /gr_max_ss.cc
 /gr_max_ss.h
 /gr_max_ss.i
-/gr_multiply_cc.cc
-/gr_multiply_cc.h
-/gr_multiply_cc.i
-/gr_multiply_const_cc.cc
-/gr_multiply_const_cc.h
-/gr_multiply_const_cc.i
 /gr_multiply_const_ff.cc
 /gr_multiply_const_ff.h
 /gr_multiply_const_ff.i
diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt
index a7292f131..53dc9ce15 100644
--- a/gnuradio-core/src/lib/gengen/CMakeLists.txt
+++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt
@@ -86,10 +86,10 @@ expand_h_cc_i(gr_noise_source_X      s i f c)
 expand_h_cc_i(gr_sig_source_X        s i f c)
 
 expand_h_cc_i(gr_add_const_XX           ss ii ff cc sf)
-expand_h_cc_i(gr_multiply_const_XX      ss ii ff cc)
+expand_h_cc_i(gr_multiply_const_XX      ss ii ff)
 expand_h_cc_i(gr_add_XX                 ss ii ff cc)
 expand_h_cc_i(gr_sub_XX                 ss ii ff cc)
-expand_h_cc_i(gr_multiply_XX            ss ii ff cc)
+expand_h_cc_i(gr_multiply_XX            ss ii ff)
 expand_h_cc_i(gr_divide_XX              ss ii ff cc)
 expand_h_cc_i(gr_mute_XX                ss ii ff cc)
 expand_h_cc_i(gr_add_const_vXX          ss ii ff cc)
diff --git a/gnuradio-core/src/lib/gengen/Makefile.gen b/gnuradio-core/src/lib/gengen/Makefile.gen
index 1c529803c..fb7b21e24 100644
--- a/gnuradio-core/src/lib/gengen/Makefile.gen
+++ b/gnuradio-core/src/lib/gengen/Makefile.gen
@@ -45,8 +45,6 @@ GENERATED_H = \
 	gr_moving_average_ff.h \
 	gr_moving_average_ii.h \
 	gr_moving_average_ss.h \
-	gr_multiply_cc.h \
-	gr_multiply_const_cc.h \
 	gr_multiply_const_ff.h \
 	gr_multiply_const_ii.h \
 	gr_multiply_const_ss.h \
@@ -150,8 +148,6 @@ GENERATED_I = \
 	gr_moving_average_ff.i \
 	gr_moving_average_ii.i \
 	gr_moving_average_ss.i \
-	gr_multiply_cc.i \
-	gr_multiply_const_cc.i \
 	gr_multiply_const_ff.i \
 	gr_multiply_const_ii.i \
 	gr_multiply_const_ss.i \
@@ -255,8 +251,6 @@ GENERATED_CC = \
 	gr_moving_average_ff.cc \
 	gr_moving_average_ii.cc \
 	gr_moving_average_ss.cc \
-	gr_multiply_cc.cc \
-	gr_multiply_const_cc.cc \
 	gr_multiply_const_ff.cc \
 	gr_multiply_const_ii.cc \
 	gr_multiply_const_ss.cc \
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 9bd6bcc9c..1c2c064c1 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -41,10 +41,8 @@ reg_signatures = ['ss', 'ii', 'ff', 'cc']
 
 reg_roots = [
     'gr_add_const_XX',
-    'gr_multiply_const_XX',
     'gr_add_XX',
     'gr_sub_XX',
-    'gr_multiply_XX',
     'gr_divide_XX',
     'gr_mute_XX',
     'gr_add_const_vXX',
@@ -66,7 +64,9 @@ others = (
     ('gr_sample_and_hold_XX',       ('bb','ss','ii','ff')),
     ('gr_argmax_XX',                ('fs','is','ss')),
     ('gr_max_XX',                   ('ff','ii','ss')),
-    ('gr_peak_detector_XX',         ('fb','ib','sb'))
+    ('gr_peak_detector_XX',         ('fb','ib','sb')),
+    ('gr_multiply_XX',              ('ss','ii','ff')),
+    ('gr_multiply_const_XX',        ('ss','ii','ff'))
     )
 
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
index 8fb70fb3f..c1d8dafd1 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
@@ -78,6 +78,18 @@ class test_add_and_friends (gr_unittest.TestCase):
         op = gr.multiply_const_ii (5)
         self.help_ii ((src_data,), expected_result, op)
 
+    def test_mult_const_cc (self):
+        src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j)
+        expected_result = (-5-5j, 0+0j, 5+5j, 10+10j, 15+15j)
+        op = gr.multiply_const_cc (5)
+        self.help_cc ((src_data,), expected_result, op)
+
+    def test_mult_const_cc2 (self):
+        src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j)
+        expected_result = (-3-7j, 0+0j, 3+7j, 6+14j, 9+21j)
+        op = gr.multiply_const_cc (5+2j)
+        self.help_cc ((src_data,), expected_result, op)
+
     def test_add_ii (self):
         src1_data = (1,  2, 3, 4, 5)
         src2_data = (8, -3, 4, 8, 2)
@@ -94,6 +106,14 @@ class test_add_and_friends (gr_unittest.TestCase):
         self.help_ii ((src1_data, src2_data),
                       expected_result, op)
 
+    def test_mult_cc (self):
+        src1_data = (1+1j,  2+2j, 3+3j, 4+4j, 5+5j)
+        src2_data = (8, -3, 4, 8, 2)
+        expected_result = (8+8j, -6-6j, 12+12j, 32+32j, 10+10j)
+        op = gr.multiply_cc ()
+        self.help_cc ((src1_data, src2_data),
+                      expected_result, op)
+
     def test_sub_ii_1 (self):
         src1_data = (1,  2, 3, 4, 5)
         expected_result = (-1, -2, -3, -4, -5)
-- 
cgit 


From c1927c72bbae3cf6fe96e3d20f3e7bb74469539a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:37:53 -0500
Subject: volk: adding new Volk functions added in this branch to the
 volk_profile tool and installing it into $prefix/bin.

---
 volk/apps/CMakeLists.txt  | 7 +++++++
 volk/apps/volk_profile.cc | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt
index f27bdc126..14291e5e3 100644
--- a/volk/apps/CMakeLists.txt
+++ b/volk/apps/CMakeLists.txt
@@ -42,4 +42,11 @@ add_executable(volk_profile
 
 target_link_libraries(volk_profile volk ${Boost_LIBRARIES})
 
+install(
+    PROGRAMS
+    ${CMAKE_BINARY_DIR}/apps/volk_profile
+    DESTINATION ${GR_RUNTIME_DIR}
+    COMPONENT "volk"
+)
+
 endif(Boost_FOUND AND UNIX)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 10a699872..c198ec42d 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -43,13 +43,18 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results);
     VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results);
     VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32fc_index_max_16u_a, 3, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100, &results);
     VOLK_PROFILE(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_magnitude_32f_u, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_magnitude_squared_32f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_magnitude_squared_32f_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
@@ -103,6 +108,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results);
     VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results);
 
     char path[256];
-- 
cgit 


From b3e538493f969526c704a3eb1b3c8d8c61be78f6 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:38:36 -0500
Subject: volk: adding new functions to Makefile.

---
 volk/include/volk/Makefile.am | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index c5b99c41b..eea006bf2 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -56,6 +56,7 @@ volkinclude_HEADERS = \
 	volk_32f_s32f_multiply_32f_a.h \
 	volk_32fc_32f_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_a.h \
+	volk_32fc_s32fc_multiply_32fc_u.h \
 	volk_32fc_s32f_power_32fc_a.h \
 	volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \
 	volk_32fc_s32f_atan2_32f_a.h \
@@ -76,6 +77,7 @@ volkinclude_HEADERS = \
 	volk_32fc_magnitude_squared_32f_a.h \
 	volk_32fc_magnitude_squared_32f_u.h \
 	volk_32fc_x2_multiply_32fc_a.h \
+	volk_32fc_x2_multiply_32fc_u.h \
 	volk_32f_s32f_convert_16i_a.h \
 	volk_32f_s32f_convert_16i_u.h \
 	volk_32f_s32f_convert_32i_a.h \
-- 
cgit 


From a3b19015cb1c896aef19a7817458878337b3f5e3 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 16:38:56 -0500
Subject: core: more fixes when moving files from gengen to general.

---
 gnuradio-core/src/guile/tests/gengen_ctors.test   | 6 ------
 gnuradio-core/src/lib/general/general_generated.i | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/gnuradio-core/src/guile/tests/gengen_ctors.test b/gnuradio-core/src/guile/tests/gengen_ctors.test
index 6e1213c63..6bac05394 100644
--- a/gnuradio-core/src/guile/tests/gengen_ctors.test
+++ b/gnuradio-core/src/guile/tests/gengen_ctors.test
@@ -161,12 +161,6 @@
 ;;; ./gengen/gr_moving_average_ss.h
 (pass-if (true? (gr:moving-average-ss 1 0 4096)))
 
-;;; ./gengen/gr_multiply_cc.h
-(pass-if (true? (gr:multiply-cc 1)))
-
-;;; ./gengen/gr_multiply_const_cc.h
-(pass-if (true? (gr:multiply-const-cc 1)))
-
 ;;; ./gengen/gr_multiply_const_ff.h
 (pass-if (true? (gr:multiply-const-ff 1)))
 
diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i
index a41f30a3d..847860f53 100644
--- a/gnuradio-core/src/lib/general/general_generated.i
+++ b/gnuradio-core/src/lib/general/general_generated.i
@@ -29,8 +29,6 @@
 #include <gr_divide_ff.h>
 #include <gr_divide_ii.h>
 #include <gr_divide_ss.h>
-#include <gr_multiply_cc.h>
-#include <gr_multiply_const_cc.h>
 #include <gr_multiply_const_ff.h>
 #include <gr_multiply_const_ii.h>
 #include <gr_multiply_const_ss.h>
@@ -106,8 +104,6 @@
 %include <gr_divide_ff.i>
 %include <gr_divide_ii.i>
 %include <gr_divide_ss.i>
-%include <gr_multiply_cc.i>
-%include <gr_multiply_const_cc.i>
 %include <gr_multiply_const_ff.i>
 %include <gr_multiply_const_ii.i>
 %include <gr_multiply_const_ss.i>
-- 
cgit 


From ae663decab658be25ac01072fa2f5c8454bd6167 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 2 Feb 2012 17:26:39 -0500
Subject: core: moving multiply_const_ff from gengen to general to take
 advantage of volk.

Also adds SSE and AVX and unaligned Volk versions for this.
---
 gnuradio-core/src/lib/general/CMakeLists.txt       |  1 +
 gnuradio-core/src/lib/general/Makefile.am          |  9 +++
 gnuradio-core/src/lib/general/general.i            |  2 +
 gnuradio-core/src/lib/general/general_generated.i  |  1 -
 gnuradio-core/src/lib/general/gr_int_to_float.cc   |  7 +-
 .../src/lib/general/gr_multiply_const_cc.cc        |  2 -
 .../src/lib/general/gr_multiply_const_ff.cc        | 80 ++++++++++++++++++++++
 .../src/lib/general/gr_multiply_const_ff.h         | 60 ++++++++++++++++
 .../src/lib/general/gr_multiply_const_ff.i         | 33 +++++++++
 gnuradio-core/src/lib/gengen/CMakeLists.txt        |  2 +-
 gnuradio-core/src/lib/gengen/generate_common.py    |  2 +-
 .../src/python/gnuradio/gr/qa_add_and_friends.py   |  6 ++
 volk/apps/volk_profile.cc                          |  1 +
 volk/include/volk/volk_32f_s32f_multiply_32f_a.h   | 75 ++++++++++++++++++++
 .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h | 12 ++--
 15 files changed, 276 insertions(+), 17 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.h
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_const_ff.i

diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 6afdfe27c..393f732b7 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -232,6 +232,7 @@ set(gr_core_general_triple_threats
     gr_map_bb
     gr_multiply_cc
     gr_multiply_const_cc
+    gr_multiply_const_ff
     gr_nlog10_ff
     gr_nop
     gr_null_sink
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index 65b5a729e..5a6f5bf46 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -94,6 +94,9 @@ libgeneral_la_SOURCES = 		\
 	gr_lfsr_32k_source_s.cc		\
 	gr_map_bb.cc			\
 	gr_misc.cc			\
+	gr_multiply_cc.cc		\
+	gr_multiply_const_cc.cc		\
+	gr_multiply_const_ff.cc		\
 	gr_nlog10_ff.cc			\
 	gr_nop.cc			\
 	gr_null_sink.cc			\
@@ -249,6 +252,9 @@ grinclude_HEADERS = 			\
 	gr_map_bb.h			\
 	gr_math.h			\
 	gr_misc.h			\
+	gr_multiply_cc.h		\
+	gr_multiply_const_cc.h		\
+	gr_multiply_const_ff.h		\
 	gr_nco.h			\
 	gr_nlog10_ff.h			\
 	gr_nop.h			\
@@ -408,6 +414,9 @@ swiginclude_HEADERS =			\
 	gr_kludge_copy.i		\
 	gr_lfsr_32k_source_s.i		\
 	gr_map_bb.i			\
+	gr_multiply_cc.i		\
+	gr_multiply_const_cc.i		\
+	gr_multiply_const_ff.i		\
 	gr_nlog10_ff.i			\
 	gr_nop.i			\
 	gr_null_sink.i			\
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index 7de13258e..ac3fef84c 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -108,6 +108,7 @@
 #include <gr_map_bb.h>
 #include <gr_multiply_cc.h>
 #include <gr_multiply_const_cc.h>
+#include <gr_multiply_const_ff.h>
 #include <gr_feval.h>
 #include <gr_pwr_squelch_cc.h>
 #include <gr_pwr_squelch_ff.h>
@@ -226,6 +227,7 @@
 %include "gr_map_bb.i"
 %include "gr_multiply_cc.i"
 %include "gr_multiply_const_cc.i"
+%include "gr_multiply_const_ff.i"
 %include "gr_feval.i"
 %include "gr_pwr_squelch_cc.i"
 %include "gr_pwr_squelch_ff.i"
diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i
index 847860f53..52e09f89b 100644
--- a/gnuradio-core/src/lib/general/general_generated.i
+++ b/gnuradio-core/src/lib/general/general_generated.i
@@ -29,7 +29,6 @@
 #include <gr_divide_ff.h>
 #include <gr_divide_ii.h>
 #include <gr_divide_ss.h>
-#include <gr_multiply_const_ff.h>
 #include <gr_multiply_const_ii.h>
 #include <gr_multiply_const_ss.h>
 #include <gr_multiply_const_vcc.h>
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.cc b/gnuradio-core/src/lib/general/gr_int_to_float.cc
index dca0e1b89..7ec15b1a8 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.cc
@@ -26,7 +26,6 @@
 
 #include <gr_int_to_float.h>
 #include <gr_io_signature.h>
-#include <gri_int_to_float.h>
 #include <volk/volk.h>
 
 gr_int_to_float_sptr
@@ -54,17 +53,13 @@ gr_int_to_float::work (int noutput_items,
   const int32_t *in = (const int32_t *) input_items[0];
   float *out = (float *) output_items[0];
   
-#if 1
   if(is_unaligned()) {
     volk_32i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
   }
   else {
     volk_32i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
   }
-#else
-  gri_int_to_float(in, out, d_vlen*noutput_items);
-#endif
-  
+
   return noutput_items;
 }
 
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
index e301ae8eb..59521f54a 100644
--- a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
@@ -57,8 +57,6 @@ gr_multiply_const_cc::set_k(gr_complex k)
   d_k = k;
 }
 
-#include <cstdio>
-
 int
 gr_multiply_const_cc::work (int noutput_items,
 			    gr_vector_const_void_star &input_items,
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc
new file mode 100644
index 000000000..8354cb27b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc
@@ -0,0 +1,80 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_const_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_multiply_const_ff (k, vlen));
+}
+
+gr_multiply_const_ff::gr_multiply_const_ff (float k, size_t vlen)
+  : gr_sync_block ("gr_multiply_const_ff",
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+    d_k(k), d_vlen(vlen)
+{
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_multiply_const_ff::k() const
+{
+  return d_k;
+}
+
+void
+gr_multiply_const_ff::set_k(float k)
+{
+  d_k = k;
+}
+
+int
+gr_multiply_const_ff::work (int noutput_items,
+			    gr_vector_const_void_star &input_items,
+			    gr_vector_void_star &output_items)
+{
+  const float *in = (const float *) input_items[0];
+  float *out = (float *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  if(is_unaligned()) {
+    volk_32f_s32f_multiply_32f_u(out, in, d_k, noi);
+  }
+  else {
+    volk_32f_s32f_multiply_32f_a(out, in, d_k, noi);
+  }
+
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.h b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h
new file mode 100644
index 000000000..ef42a92f4
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONST_FF_H
+#define INCLUDED_GR_MULTIPLY_CONST_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_const_ff;
+typedef boost::shared_ptr<gr_multiply_const_ff> gr_multiply_const_ff_sptr;
+
+GR_CORE_API gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen=1);
+
+/*!
+ * \brief Multiply stream of float values with a constant \p k
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_const_ff : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_multiply_const_ff_sptr
+    gr_make_multiply_const_ff (float k, size_t vlen);
+  gr_multiply_const_ff (float k, size_t vlen);
+
+  float d_k;
+  size_t d_vlen;
+
+ public:
+  float k() const;
+  void set_k(float k);
+
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONST_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.i b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i
new file mode 100644
index 000000000..0fd3b1225
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i
@@ -0,0 +1,33 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_const_ff)
+
+gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen=1);
+
+class gr_multiply_const_ff : public gr_sync_block
+{
+public:
+  float k() const;
+  void set_k(float k);
+};
diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt
index 53dc9ce15..83213dc04 100644
--- a/gnuradio-core/src/lib/gengen/CMakeLists.txt
+++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt
@@ -86,7 +86,7 @@ expand_h_cc_i(gr_noise_source_X      s i f c)
 expand_h_cc_i(gr_sig_source_X        s i f c)
 
 expand_h_cc_i(gr_add_const_XX           ss ii ff cc sf)
-expand_h_cc_i(gr_multiply_const_XX      ss ii ff)
+expand_h_cc_i(gr_multiply_const_XX      ss ii)
 expand_h_cc_i(gr_add_XX                 ss ii ff cc)
 expand_h_cc_i(gr_sub_XX                 ss ii ff cc)
 expand_h_cc_i(gr_multiply_XX            ss ii ff)
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 1c2c064c1..5caa7098b 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -66,7 +66,7 @@ others = (
     ('gr_max_XX',                   ('ff','ii','ss')),
     ('gr_peak_detector_XX',         ('fb','ib','sb')),
     ('gr_multiply_XX',              ('ss','ii','ff')),
-    ('gr_multiply_const_XX',        ('ss','ii','ff'))
+    ('gr_multiply_const_XX',        ('ss','ii'))
     )
 
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
index c1d8dafd1..aad57e580 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
@@ -78,6 +78,12 @@ class test_add_and_friends (gr_unittest.TestCase):
         op = gr.multiply_const_ii (5)
         self.help_ii ((src_data,), expected_result, op)
 
+    def test_mult_const_ff (self):
+        src_data = (-1, 0, 1, 2, 3)
+        expected_result = (-5, 0, 5, 10, 15)
+        op = gr.multiply_const_cc (5)
+        self.help_cc ((src_data,), expected_result, op)
+
     def test_mult_const_cc (self):
         src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j)
         expected_result = (-5-5j, 0+0j, 5+5j, 10+10j, 15+15j)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index c198ec42d..7da8651e9 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -110,6 +110,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 204600, 1000, &results);
 
     char path[256];
     get_config_path(path);
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
index 37223dc81..d1c6f3f65 100644
--- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
@@ -4,6 +4,81 @@
 #include <inttypes.h>
 #include <stdio.h>
 
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m128 aVal, bVal, cVal;
+    bVal = _mm_set_ps1(scalar);
+    for(;number < quarterPoints; number++){
+      
+      aVal = _mm_load_ps(aPtr); 
+      
+      cVal = _mm_mul_ps(aVal, bVal); 
+      
+      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 4;
+      cPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int eighthPoints = num_points / 8;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m256 aVal, bVal, cVal;
+    bVal = _mm256_set1_ps(scalar);
+    for(;number < eighthPoints; number++){
+      
+      aVal = _mm256_load_ps(aPtr); 
+      
+      cVal = _mm256_mul_ps(aVal, bVal); 
+      
+      _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 8;
+      cPtr += 8;
+    }
+
+    number = eighthPoints * 8;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_AVX */
+
+
 #ifdef LV_HAVE_GENERIC
 /*!
   \brief Scalar float multiply
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
index a9dfcda19..450a89066 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -9,10 +9,10 @@
 #ifdef LV_HAVE_SSE3
 #include <pmmintrin.h>
   /*!
-    \brief Multiplies the two input complex vectors and stores their results in the third vector
+  \brief Multiplies the input vector by a scalar and stores the results in the third vector
     \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
+    \param aVector The vector to be multiplied
+    \param scalar The complex scalar to multiply aVector
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
 static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
@@ -53,10 +53,10 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons
 
 #ifdef LV_HAVE_GENERIC
   /*!
-    \brief Multiplies the two input complex vectors and stores their results in the third vector
+  \brief Multiplies the input vector by a scalar and stores the results in the third vector
     \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
+    \param aVector The vector to be multiplied
+    \param scalar The complex scalar to multiply aVector
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
 static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-- 
cgit 


From 3a21ccb8cdef50daa52f5d26293df3a03c821c99 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 4 Feb 2012 11:03:40 -0500
Subject: sched: some added protections and checks for the alignment states.

---
 gnuradio-core/src/lib/runtime/gr_block_executor.cc | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index 11c6639b3..86289695a 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -183,7 +183,8 @@ gr_block_executor::run_one_iteration()
   int			noutput_items;
   int			max_items_avail;
   int                   max_noutput_items = d_max_noutput_items;
-  int                   new_alignment;
+  int                   new_alignment=0;
+  int                   alignment_state=-1;
 
   gr_block		*m = d_block.get();
   gr_block_detail	*d = m->detail().get();
@@ -338,6 +339,7 @@ gr_block_executor::run_one_iteration()
 	else {
 	  new_alignment = m->unaligned() - noutput_items;
 	}
+	alignment_state = 0;
       }
       else if(noutput_items < m->alignment()) {
 	// if we don't have enough for an aligned call, keep track of
@@ -345,11 +347,13 @@ gr_block_executor::run_one_iteration()
 	new_alignment = m->alignment() - noutput_items;
 	m->set_unaligned(new_alignment);
 	m->set_is_unaligned(true);
+	alignment_state = 1;
       }
       else {
 	// enough to round down to the nearest alignment and process.
 	noutput_items = round_down(noutput_items, m->alignment());
 	m->set_is_unaligned(false);
+	alignment_state = 2;
       }
     }
 
@@ -391,6 +395,12 @@ gr_block_executor::run_one_iteration()
 	goto were_done;
       }
 
+      // If we were made unaligned in this round but return here without
+      // processing; reset the unalignment claim before next entry.
+      if(alignment_state == 1) {
+	m->set_unaligned(0);
+	m->set_is_unaligned(false);
+      }
       return BLKD_IN;
     }
 
-- 
cgit 


From f028a198cb47e0486ad41a29bd3b3dcf0663d766 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 4 Feb 2012 11:04:30 -0500
Subject: volk: new unaligned versions of float multipliers.

---
 volk/apps/volk_profile.cc                          |   1 +
 volk/include/volk/Makefile.am                      |   2 +
 volk/include/volk/volk_32f_s32f_multiply_32f_u.h   | 102 ++++++++++++++++++++
 volk/include/volk/volk_32f_x2_multiply_32f_u.h     | 106 +++++++++++++++++++++
 .../include/volk/volk_32fc_s32fc_multiply_32fc_u.h |  24 ++---
 volk/lib/testqa.cc                                 |  10 +-
 6 files changed, 231 insertions(+), 14 deletions(-)
 create mode 100644 volk/include/volk/volk_32f_s32f_multiply_32f_u.h
 create mode 100644 volk/include/volk/volk_32f_x2_multiply_32f_u.h

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 7da8651e9..f5f730df1 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -77,6 +77,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000, &results);
     VOLK_PROFILE(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000, &results);
     VOLK_PROFILE(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_x2_multiply_32f_u, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100, &results);
     VOLK_PROFILE(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100, &results);
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index eea006bf2..312ff2d5a 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -54,6 +54,7 @@ volkinclude_HEADERS = \
 	volk_32f_accumulator_s32f_a.h \
 	volk_32f_x2_add_32f_a.h \
 	volk_32f_s32f_multiply_32f_a.h \
+	volk_32f_s32f_multiply_32f_u.h \
 	volk_32fc_32f_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_u.h \
@@ -100,6 +101,7 @@ volkinclude_HEADERS = \
 	volk_32f_x2_max_32f_a.h \
 	volk_32f_x2_min_32f_a.h \
 	volk_32f_x2_multiply_32f_a.h \
+	volk_32f_x2_multiply_32f_u.h \
 	volk_32f_s32f_normalize_a.h \
 	volk_32f_s32f_power_32f_a.h \
 	volk_32f_sqrt_32f_a.h \
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_u.h b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
new file mode 100644
index 000000000..0e700060f
--- /dev/null
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
@@ -0,0 +1,102 @@
+#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
+#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m128 aVal, bVal, cVal;
+    bVal = _mm_set_ps1(scalar);
+    for(;number < quarterPoints; number++){
+      
+      aVal = _mm_loadu_ps(aPtr); 
+      
+      cVal = _mm_mul_ps(aVal, bVal); 
+      
+      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 4;
+      cPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int eighthPoints = num_points / 8;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m256 aVal, bVal, cVal;
+    bVal = _mm256_set1_ps(scalar);
+    for(;number < eighthPoints; number++){
+      
+      aVal = _mm256_loadu_ps(aPtr); 
+      
+      cVal = _mm256_mul_ps(aVal, bVal); 
+      
+      _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 8;
+      cPtr += 8;
+    }
+
+    number = eighthPoints * 8;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+  unsigned int number = 0;
+  const float* inputPtr = aVector;
+  float* outputPtr = cVector;
+  for(number = 0; number < num_points; number++){
+    *outputPtr = (*inputPtr) * scalar;
+    inputPtr++;
+    outputPtr++;
+  }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_u.h b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
new file mode 100644
index 000000000..6c3ce5d83
--- /dev/null
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
@@ -0,0 +1,106 @@
+#ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
+#define INCLUDED_volk_32f_x2_multiply_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Multiplys the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param bVector One of the vectors to be multiplied
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+    const float* bPtr=  bVector;
+
+    __m128 aVal, bVal, cVal;
+    for(;number < quarterPoints; number++){
+      
+      aVal = _mm_loadu_ps(aPtr); 
+      bVal = _mm_loadu_ps(bPtr);
+      
+      cVal = _mm_mul_ps(aVal, bVal); 
+      
+      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 4;
+      bPtr += 4;
+      cPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Multiplies the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param bVector One of the vectors to be multiplied
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int eighthPoints = num_points / 8;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+    const float* bPtr=  bVector;
+
+    __m256 aVal, bVal, cVal;
+    for(;number < eighthPoints; number++){
+      
+      aVal = _mm256_loadu_ps(aPtr); 
+      bVal = _mm256_loadu_ps(bPtr);
+      
+      cVal = _mm256_mul_ps(aVal, bVal); 
+      
+      _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 8;
+      bPtr += 8;
+      cPtr += 8;
+    }
+
+    number = eighthPoints * 8;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Multiplys the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param bVector One of the vectors to be multiplied
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+    const float* bPtr=  bVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = (*aPtr++) * (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
index 450a89066..218c450f8 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -8,13 +8,13 @@
 
 #ifdef LV_HAVE_SSE3
 #include <pmmintrin.h>
-  /*!
+/*!
   \brief Multiplies the input vector by a scalar and stores the results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The vector to be multiplied
-    \param scalar The complex scalar to multiply aVector
-    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-  */
+  \param cVector The vector where the results will be stored
+  \param aVector The vector to be multiplied
+  \param scalar The complex scalar to multiply aVector
+  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
 static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
   unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
@@ -52,13 +52,13 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons
 #endif /* LV_HAVE_SSE */
 
 #ifdef LV_HAVE_GENERIC
-  /*!
+/*!
   \brief Multiplies the input vector by a scalar and stores the results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The vector to be multiplied
-    \param scalar The complex scalar to multiply aVector
-    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-  */
+  \param cVector The vector where the results will be stored
+  \param aVector The vector to be multiplied
+  \param scalar The complex scalar to multiply aVector
+  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
 static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
     lv_32fc_t* cPtr = cVector;
     const lv_32fc_t* aPtr = aVector;
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index fbd4bdea5..97624775e 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -37,7 +37,6 @@ VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 1);
 VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 3, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 1);
@@ -59,7 +58,6 @@ VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 1);
@@ -90,3 +88,11 @@ VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1);
 VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 20460, 1);
-- 
cgit 


From 47c390286d49e00498a3443a3dcb9f83d11c7ecc Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 4 Feb 2012 11:05:25 -0500
Subject: core: new multiply_const_ff and multiply_ff blocks done using Volk.

---
 gnuradio-core/src/lib/general/.gitignore           |  6 --
 gnuradio-core/src/lib/general/CMakeLists.txt       |  1 +
 gnuradio-core/src/lib/general/Makefile.am          |  3 +
 gnuradio-core/src/lib/general/general.i            |  2 +
 gnuradio-core/src/lib/general/general_generated.i  |  3 -
 gnuradio-core/src/lib/general/gr_multiply_ff.cc    | 69 ++++++++++++++++++++++
 gnuradio-core/src/lib/general/gr_multiply_ff.h     | 56 ++++++++++++++++++
 gnuradio-core/src/lib/general/gr_multiply_ff.i     | 32 ++++++++++
 gnuradio-core/src/lib/gengen/.gitignore            |  6 --
 gnuradio-core/src/lib/gengen/CMakeLists.txt        |  2 +-
 gnuradio-core/src/lib/gengen/generate_common.py    |  2 +-
 .../src/python/gnuradio/gr/qa_add_and_friends.py   |  8 +++
 12 files changed, 173 insertions(+), 17 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.h
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_ff.i

diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore
index 349651e0c..b04ffe4ae 100644
--- a/gnuradio-core/src/lib/general/.gitignore
+++ b/gnuradio-core/src/lib/general/.gitignore
@@ -158,18 +158,12 @@
 /gr_divide_ss.cc
 /gr_divide_ss.h
 /gr_divide_ss.i
-/gr_multiply_const_ff.cc
-/gr_multiply_const_ff.h
-/gr_multiply_const_ff.i
 /gr_multiply_const_ii.cc
 /gr_multiply_const_ii.h
 /gr_multiply_const_ii.i
 /gr_multiply_const_ss.cc
 /gr_multiply_const_ss.h
 /gr_multiply_const_ss.i
-/gr_multiply_ff.cc
-/gr_multiply_ff.h
-/gr_multiply_ff.i
 /gr_multiply_ii.cc
 /gr_multiply_ii.h
 /gr_multiply_ii.i
diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 393f732b7..301465361 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -231,6 +231,7 @@ set(gr_core_general_triple_threats
     gr_lfsr_32k_source_s
     gr_map_bb
     gr_multiply_cc
+    gr_multiply_ff
     gr_multiply_const_cc
     gr_multiply_const_ff
     gr_nlog10_ff
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index 5a6f5bf46..b452a5107 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -95,6 +95,7 @@ libgeneral_la_SOURCES = 		\
 	gr_map_bb.cc			\
 	gr_misc.cc			\
 	gr_multiply_cc.cc		\
+	gr_multiply_ff.cc		\
 	gr_multiply_const_cc.cc		\
 	gr_multiply_const_ff.cc		\
 	gr_nlog10_ff.cc			\
@@ -253,6 +254,7 @@ grinclude_HEADERS = 			\
 	gr_math.h			\
 	gr_misc.h			\
 	gr_multiply_cc.h		\
+	gr_multiply_ff.h		\
 	gr_multiply_const_cc.h		\
 	gr_multiply_const_ff.h		\
 	gr_nco.h			\
@@ -415,6 +417,7 @@ swiginclude_HEADERS =			\
 	gr_lfsr_32k_source_s.i		\
 	gr_map_bb.i			\
 	gr_multiply_cc.i		\
+	gr_multiply_ff.i		\
 	gr_multiply_const_cc.i		\
 	gr_multiply_const_ff.i		\
 	gr_nlog10_ff.i			\
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index ac3fef84c..8e1be02f9 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -107,6 +107,7 @@
 #include <gr_framer_sink_1.h>
 #include <gr_map_bb.h>
 #include <gr_multiply_cc.h>
+#include <gr_multiply_ff.h>
 #include <gr_multiply_const_cc.h>
 #include <gr_multiply_const_ff.h>
 #include <gr_feval.h>
@@ -226,6 +227,7 @@
 %include "gr_framer_sink_1.i"
 %include "gr_map_bb.i"
 %include "gr_multiply_cc.i"
+%include "gr_multiply_ff.i"
 %include "gr_multiply_const_cc.i"
 %include "gr_multiply_const_ff.i"
 %include "gr_feval.i"
diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i
index 52e09f89b..82f9a6006 100644
--- a/gnuradio-core/src/lib/general/general_generated.i
+++ b/gnuradio-core/src/lib/general/general_generated.i
@@ -35,7 +35,6 @@
 #include <gr_multiply_const_vff.h>
 #include <gr_multiply_const_vii.h>
 #include <gr_multiply_const_vss.h>
-#include <gr_multiply_ff.h>
 #include <gr_multiply_ii.h>
 #include <gr_multiply_ss.h>
 #include <gr_multiply_vcc.h>
@@ -103,14 +102,12 @@
 %include <gr_divide_ff.i>
 %include <gr_divide_ii.i>
 %include <gr_divide_ss.i>
-%include <gr_multiply_const_ff.i>
 %include <gr_multiply_const_ii.i>
 %include <gr_multiply_const_ss.i>
 %include <gr_multiply_const_vcc.i>
 %include <gr_multiply_const_vff.i>
 %include <gr_multiply_const_vii.i>
 %include <gr_multiply_const_vss.i>
-%include <gr_multiply_ff.i>
 %include <gr_multiply_ii.i>
 %include <gr_multiply_ss.i>
 %include <gr_multiply_vcc.i>
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_ff.cc
new file mode 100644
index 000000000..a7d34ce51
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_multiply_ff (vlen));
+}
+
+gr_multiply_ff::gr_multiply_ff (size_t vlen)
+  : gr_sync_block ("gr_multiply_ff",
+		   gr_make_io_signature (1, -1, sizeof (float)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+    d_vlen(vlen)
+{
+  const int alignment_multiple = 
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_ff::work (int noutput_items,
+		      gr_vector_const_void_star &input_items,
+		      gr_vector_void_star &output_items)
+{
+  float *out = (float *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  memcpy(out, input_items[0], noi*sizeof(float));
+  if(is_unaligned()) {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32f_x2_multiply_32f_u(out, out, (const float*)input_items[i], noi);
+  }
+  else {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32f_x2_multiply_32f_a(out, out, (const float*)input_items[i], noi);
+  }
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.h b/gnuradio-core/src/lib/general/gr_multiply_ff.h
new file mode 100644
index 000000000..ae36cb1e0
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_FF_H
+#define INCLUDED_GR_MULTIPLY_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_ff;
+typedef boost::shared_ptr<gr_multiply_ff> gr_multiply_ff_sptr;
+
+GR_CORE_API gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen=1);
+
+/*!
+ * \brief Multiply streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_ff : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_multiply_ff_sptr
+    gr_make_multiply_ff (size_t vlen);
+  gr_multiply_ff (size_t vlen);
+  
+  size_t d_vlen;
+
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.i b/gnuradio-core/src/lib/general/gr_multiply_ff.i
new file mode 100644
index 000000000..0f06301f2
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_ff)
+
+gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen=1);
+
+class gr_multiply_ff : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore
index 4422ae0dd..72c915cb8 100644
--- a/gnuradio-core/src/lib/gengen/.gitignore
+++ b/gnuradio-core/src/lib/gengen/.gitignore
@@ -202,9 +202,6 @@
 /gr_max_ss.cc
 /gr_max_ss.h
 /gr_max_ss.i
-/gr_multiply_const_ff.cc
-/gr_multiply_const_ff.h
-/gr_multiply_const_ff.i
 /gr_multiply_const_ii.cc
 /gr_multiply_const_ii.h
 /gr_multiply_const_ii.i
@@ -223,9 +220,6 @@
 /gr_multiply_const_vss.cc
 /gr_multiply_const_vss.h
 /gr_multiply_const_vss.i
-/gr_multiply_ff.cc
-/gr_multiply_ff.h
-/gr_multiply_ff.i
 /gr_multiply_ii.cc
 /gr_multiply_ii.h
 /gr_multiply_ii.i
diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt
index 83213dc04..c3c4a7a35 100644
--- a/gnuradio-core/src/lib/gengen/CMakeLists.txt
+++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt
@@ -89,7 +89,7 @@ expand_h_cc_i(gr_add_const_XX           ss ii ff cc sf)
 expand_h_cc_i(gr_multiply_const_XX      ss ii)
 expand_h_cc_i(gr_add_XX                 ss ii ff cc)
 expand_h_cc_i(gr_sub_XX                 ss ii ff cc)
-expand_h_cc_i(gr_multiply_XX            ss ii ff)
+expand_h_cc_i(gr_multiply_XX            ss ii)
 expand_h_cc_i(gr_divide_XX              ss ii ff cc)
 expand_h_cc_i(gr_mute_XX                ss ii ff cc)
 expand_h_cc_i(gr_add_const_vXX          ss ii ff cc)
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 5caa7098b..0c3d4579d 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -65,7 +65,7 @@ others = (
     ('gr_argmax_XX',                ('fs','is','ss')),
     ('gr_max_XX',                   ('ff','ii','ss')),
     ('gr_peak_detector_XX',         ('fb','ib','sb')),
-    ('gr_multiply_XX',              ('ss','ii','ff')),
+    ('gr_multiply_XX',              ('ss','ii')),
     ('gr_multiply_const_XX',        ('ss','ii'))
     )
 
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
index aad57e580..e3b20c3c3 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
@@ -112,6 +112,14 @@ class test_add_and_friends (gr_unittest.TestCase):
         self.help_ii ((src1_data, src2_data),
                       expected_result, op)
 
+    def test_mult_ff (self):
+        src1_data = (1,  2, 3, 4, 5)
+        src2_data = (8, -3, 4, 8, 2)
+        expected_result = (8, -6, 12, 32, 10)
+        op = gr.multiply_ff ()
+        self.help_ff ((src1_data, src2_data),
+                      expected_result, op)
+
     def test_mult_cc (self):
         src1_data = (1+1j,  2+2j, 3+3j, 4+4j, 5+5j)
         src2_data = (8, -3, 4, 8, 2)
-- 
cgit 


From 298623615b249de459cd12b5507dab921fc3210b Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 6 Feb 2012 10:31:28 -0500
Subject: volk: added unaligned version of adding 2 vectors.

---
 volk/apps/volk_profile.cc                 |  1 +
 volk/include/volk/Makefile.am             |  1 +
 volk/include/volk/volk_32f_x2_add_32f_u.h | 66 +++++++++++++++++++++++++++++++
 volk/lib/testqa.cc                        |  1 +
 4 files changed, 69 insertions(+)
 create mode 100644 volk/include/volk/volk_32f_x2_add_32f_u.h

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index f5f730df1..712c32bce 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -34,6 +34,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_16u_byteswap_a, 0, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_x2_add_32f_u, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50, &results);
     VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000, &results);
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index 312ff2d5a..20864efbe 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -53,6 +53,7 @@ volkinclude_HEADERS = \
 	volk_16u_byteswap_a.h \
 	volk_32f_accumulator_s32f_a.h \
 	volk_32f_x2_add_32f_a.h \
+	volk_32f_x2_add_32f_u.h \
 	volk_32f_s32f_multiply_32f_a.h \
 	volk_32f_s32f_multiply_32f_u.h \
 	volk_32fc_32f_multiply_32fc_a.h \
diff --git a/volk/include/volk/volk_32f_x2_add_32f_u.h b/volk/include/volk/volk_32f_x2_add_32f_u.h
new file mode 100644
index 000000000..e360a7958
--- /dev/null
+++ b/volk/include/volk/volk_32f_x2_add_32f_u.h
@@ -0,0 +1,66 @@
+#ifndef INCLUDED_volk_32f_x2_add_32f_u_H
+#define INCLUDED_volk_32f_x2_add_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Adds the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be added
+  \param bVector One of the vectors to be added
+  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+    const float* bPtr=  bVector;
+
+    __m128 aVal, bVal, cVal;
+    for(;number < quarterPoints; number++){
+      
+      aVal = _mm_loadu_ps(aPtr); 
+      bVal = _mm_loadu_ps(bPtr);
+      
+      cVal = _mm_add_ps(aVal, bVal); 
+      
+      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 4;
+      bPtr += 4;
+      cPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) + (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Adds the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be added
+  \param bVector One of the vectors to be added
+  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_32f_x2_add_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+    const float* bPtr=  bVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = (*aPtr++) + (*bPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 97624775e..b00ea0b64 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -22,6 +22,7 @@ VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1);
-- 
cgit 


From fb2a84add9706a046b4761021707d6bb97496a2e Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 6 Feb 2012 10:32:18 -0500
Subject: core: added volk version of add_ff.

---
 gnuradio-core/src/lib/general/.gitignore          |  3 -
 gnuradio-core/src/lib/general/CMakeLists.txt      |  1 +
 gnuradio-core/src/lib/general/Makefile.am         |  3 +
 gnuradio-core/src/lib/general/general.i           |  2 +
 gnuradio-core/src/lib/general/general_generated.i |  2 -
 gnuradio-core/src/lib/general/gr_add_ff.cc        | 70 +++++++++++++++++++++++
 gnuradio-core/src/lib/general/gr_add_ff.h         | 56 ++++++++++++++++++
 gnuradio-core/src/lib/general/gr_add_ff.i         | 32 +++++++++++
 gnuradio-core/src/lib/gengen/.gitignore           |  3 -
 gnuradio-core/src/lib/gengen/CMakeLists.txt       |  2 +-
 gnuradio-core/src/lib/gengen/generate_common.py   |  4 +-
 11 files changed, 167 insertions(+), 11 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.h
 create mode 100644 gnuradio-core/src/lib/general/gr_add_ff.i

diff --git a/gnuradio-core/src/lib/general/.gitignore b/gnuradio-core/src/lib/general/.gitignore
index b04ffe4ae..795dc793c 100644
--- a/gnuradio-core/src/lib/general/.gitignore
+++ b/gnuradio-core/src/lib/general/.gitignore
@@ -125,9 +125,6 @@
 /gr_add_const_vss.cc
 /gr_add_const_vss.h
 /gr_add_const_vss.i
-/gr_add_ff.cc
-/gr_add_ff.h
-/gr_add_ff.i
 /gr_add_ii.cc
 /gr_add_ii.h
 /gr_add_ii.i
diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 301465361..ab35aa66a 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -178,6 +178,7 @@ endif(ENABLE_PYTHON)
 ########################################################################
 set(gr_core_general_triple_threats
     complex_vec_test
+    gr_add_ff
     gr_additive_scrambler_bb
     gr_agc_cc
     gr_agc_ff
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index b452a5107..89152d663 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -37,6 +37,7 @@ EXTRA_DIST += \
 
 libgeneral_la_SOURCES = 		\
 	complex_vec_test.cc		\
+	gr_add_ff.cc			\
 	gr_additive_scrambler_bb.cc	\
 	gr_agc_cc.cc                	\
 	gr_agc_ff.cc                	\
@@ -192,6 +193,7 @@ libgeneral_qa_la_SOURCES = 		\
 grinclude_HEADERS = 			\
 	gr_core_api.h			\
 	complex_vec_test.h		\
+	gr_add.h			\
 	gr_additive_scrambler_bb.h	\
 	gr_agc_cc.h                 	\
 	gr_agc_ff.h                 	\
@@ -364,6 +366,7 @@ noinst_HEADERS = 			\
 swiginclude_HEADERS =			\
 	complex_vec_test.i		\
 	general.i			\
+	gr_add.i			\
 	gr_additive_scrambler_bb.i	\
 	gr_agc_cc.i                 	\
 	gr_agc_ff.i                 	\
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index 8e1be02f9..c384ecfbb 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -140,6 +140,7 @@
 #include <gr_burst_tagger.h>
 #include <gr_cpm.h>
 #include <gr_correlate_access_code_tag_bb.h>
+#include <gr_add_ff.h>
 %}
 
 %include "gri_control_loop.i"
@@ -260,3 +261,4 @@
 %include "gr_burst_tagger.i"
 %include "gr_cpm.i"
 %include "gr_correlate_access_code_tag_bb.i"
+%include "gr_add_ff.i"
diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i
index 82f9a6006..e12f2b0ec 100644
--- a/gnuradio-core/src/lib/general/general_generated.i
+++ b/gnuradio-core/src/lib/general/general_generated.i
@@ -12,7 +12,6 @@
 #include <gr_add_const_vff.h>
 #include <gr_add_const_vii.h>
 #include <gr_add_const_vss.h>
-#include <gr_add_ff.h>
 #include <gr_add_ii.h>
 #include <gr_add_ss.h>
 #include <gr_add_vcc.h>
@@ -85,7 +84,6 @@
 %include <gr_add_const_vff.i>
 %include <gr_add_const_vii.i>
 %include <gr_add_const_vss.i>
-%include <gr_add_ff.i>
 %include <gr_add_ii.i>
 %include <gr_add_ss.i>
 %include <gr_add_vcc.i>
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.cc b/gnuradio-core/src/lib/general/gr_add_ff.cc
new file mode 100644
index 000000000..a5db5ec5c
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.cc
@@ -0,0 +1,70 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_add_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_add_ff_sptr
+gr_make_add_ff(size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_add_ff(vlen));
+}
+
+gr_add_ff::gr_add_ff (size_t vlen)
+  : gr_sync_block("add_ff",
+		  gr_make_io_signature (1, -1, sizeof(float)*vlen),
+		  gr_make_io_signature (1,  1, sizeof(float)*vlen)),
+    d_vlen (vlen)
+{
+  const int alignment_multiple = 
+    volk_get_alignment() / sizeof(float);
+  set_alignment(alignment_multiple);
+}
+
+int
+gr_add_ff::work(int noutput_items,
+		gr_vector_const_void_star &input_items,
+		gr_vector_void_star &output_items)
+{
+  float *out = (float *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  memcpy(out, input_items[0], noi*sizeof(float));
+  for(size_t i = 1; i < input_items.size(); i++)
+    volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi);
+  /*
+  if(is_unaligned()) {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi);
+  }
+  else {
+    for(size_t i = 1; i < input_items.size(); i++)
+      volk_32f_x2_add_32f_a(out, out, (const float*)input_items[i], noi);
+  }
+  */
+  return noutput_items;
+}
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.h b/gnuradio-core/src/lib/general/gr_add_ff.h
new file mode 100644
index 000000000..6421f8da2
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_ADD_FF_H
+#define INCLUDED_GR_ADD_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_add_ff;
+typedef boost::shared_ptr<gr_add_ff> gr_add_ff_sptr;
+
+GR_CORE_API gr_add_ff_sptr
+gr_make_add_ff (size_t vlen=1);
+
+/*!
+ * \brief Add streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_add_ff : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_add_ff_sptr
+    gr_make_add_ff (size_t vlen);
+  gr_add_ff (size_t vlen);
+  
+  size_t d_vlen;
+
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_ADD_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.i b/gnuradio-core/src/lib/general/gr_add_ff.i
new file mode 100644
index 000000000..3c30640b1
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,add_ff)
+
+gr_add_ff_sptr
+gr_make_add_ff (size_t vlen=1);
+
+class gr_add_ff : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/gengen/.gitignore b/gnuradio-core/src/lib/gengen/.gitignore
index 72c915cb8..b91d2f197 100644
--- a/gnuradio-core/src/lib/gengen/.gitignore
+++ b/gnuradio-core/src/lib/gengen/.gitignore
@@ -124,9 +124,6 @@
 /gr_add_const_vss.cc
 /gr_add_const_vss.h
 /gr_add_const_vss.i
-/gr_add_ff.cc
-/gr_add_ff.h
-/gr_add_ff.i
 /gr_add_ii.cc
 /gr_add_ii.h
 /gr_add_ii.i
diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt
index c3c4a7a35..98b149e97 100644
--- a/gnuradio-core/src/lib/gengen/CMakeLists.txt
+++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt
@@ -87,7 +87,7 @@ expand_h_cc_i(gr_sig_source_X        s i f c)
 
 expand_h_cc_i(gr_add_const_XX           ss ii ff cc sf)
 expand_h_cc_i(gr_multiply_const_XX      ss ii)
-expand_h_cc_i(gr_add_XX                 ss ii ff cc)
+expand_h_cc_i(gr_add_XX                 ss ii cc)
 expand_h_cc_i(gr_sub_XX                 ss ii ff cc)
 expand_h_cc_i(gr_multiply_XX            ss ii)
 expand_h_cc_i(gr_divide_XX              ss ii ff cc)
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 0c3d4579d..616cc4b06 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -41,7 +41,6 @@ reg_signatures = ['ss', 'ii', 'ff', 'cc']
 
 reg_roots = [
     'gr_add_const_XX',
-    'gr_add_XX',
     'gr_sub_XX',
     'gr_divide_XX',
     'gr_mute_XX',
@@ -66,7 +65,8 @@ others = (
     ('gr_max_XX',                   ('ff','ii','ss')),
     ('gr_peak_detector_XX',         ('fb','ib','sb')),
     ('gr_multiply_XX',              ('ss','ii')),
-    ('gr_multiply_const_XX',        ('ss','ii'))
+    ('gr_multiply_const_XX',        ('ss','ii')),
+    ('gr_add_XX',                   ('ss','cc'))
     )
 
 
-- 
cgit 


From 69210086c7ae98b93a63a1d810ee28b304a13520 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 6 Feb 2012 22:02:09 -0500
Subject: volk: added a 32fc multiply conjugate kernel.

---
 volk/apps/volk_profile.cc                          |  2 +
 volk/include/volk/Makefile.am                      |  2 +
 .../volk/volk_32fc_x2_multiply_conjugate_32fc_a.h  | 82 ++++++++++++++++++++++
 .../volk/volk_32fc_x2_multiply_conjugate_32fc_u.h  | 81 +++++++++++++++++++++
 volk/lib/testqa.cc                                 |  2 +
 5 files changed, 169 insertions(+)
 create mode 100644 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
 create mode 100644 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 712c32bce..6ba7f17bb 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -56,6 +56,8 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_magnitude_squared_32f_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index 20864efbe..d071f18f2 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -59,6 +59,8 @@ volkinclude_HEADERS = \
 	volk_32fc_32f_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_u.h \
+	volk_32fc_s32fc_multiply_conjugate_32fc_a.h \
+	volk_32fc_s32fc_multiply_conjugate_32fc_u.h \
 	volk_32fc_s32f_power_32fc_a.h \
 	volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \
 	volk_32fc_s32f_atan2_32f_a.h \
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
new file mode 100644
index 000000000..70476a8c7
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
@@ -0,0 +1,82 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
+#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector First vector to be multiplied
+    \param bVector Second vector that is conjugated before being multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+  unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x, y, yl, yh, z, tmp1, tmp2;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+    const lv_32fc_t* b = bVector;
+
+    __m128 conjugator = _mm_setr_ps(1, -1, 1, -1);
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+      y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+      // FIXME: replace with xor for a faster implementation
+      y = _mm_mul_ps(y, conjugator); // conjugate y
+      
+      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+      
+      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+      
+      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+      
+      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+      
+      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+    
+      _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+      a += 2;
+      b += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = (*a) * lv_conj(*b);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector First vector to be multiplied
+    \param bVector Second vector that is conjugated before being multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    const lv_32fc_t* bPtr=  bVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
new file mode 100644
index 000000000..fbaa29c17
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
@@ -0,0 +1,81 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
+#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector First vector to be multiplied
+    \param bVector Second vector that is conjugated before being multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+  unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x, y, yl, yh, z, tmp1, tmp2;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+    const lv_32fc_t* b = bVector;
+
+    __m128 conjugator = _mm_set_ps(0, 0x80000000, 0, 0x80000000);
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+      y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+      y = _mm_xor_ps(y, conjugator); // conjugate y
+      
+      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+      
+      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+      
+      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+      
+      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+      
+      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+    
+      _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+      a += 2;
+      b += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = (*a) * lv_conj(*b);
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector First vector to be multiplied
+    \param bVector Second vector that is conjugated before being multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    const lv_32fc_t* bPtr=  bVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index b00ea0b64..fdd3d4853 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -91,6 +91,8 @@ VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1);
 VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);
-- 
cgit 


From f34b496341ceb73baffee6f8bf84ed197ffeeaf0 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 6 Feb 2012 22:02:56 -0500
Subject: core: added Volk-optimized gr_multiply_conjugate_cc at one block with
 QA code.

---
 gnuradio-core/src/lib/general/CMakeLists.txt       |  1 +
 gnuradio-core/src/lib/general/Makefile.am          |  3 +
 gnuradio-core/src/lib/general/general.i            |  2 +
 gnuradio-core/src/lib/general/gr_add_ff.cc         |  4 --
 .../src/lib/general/gr_multiply_conjugate_cc.cc    | 69 ++++++++++++++++++++++
 .../src/lib/general/gr_multiply_conjugate_cc.h     | 57 ++++++++++++++++++
 .../src/lib/general/gr_multiply_conjugate_cc.i     | 32 ++++++++++
 gnuradio-core/src/python/gnuradio/gr/Makefile.am   |  1 +
 .../python/gnuradio/gr/qa_multiply_conjugate.py    | 57 ++++++++++++++++++
 9 files changed, 222 insertions(+), 4 deletions(-)
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h
 create mode 100644 gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i
 create mode 100644 gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py

diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index ab35aa66a..2bc639cd3 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -235,6 +235,7 @@ set(gr_core_general_triple_threats
     gr_multiply_ff
     gr_multiply_const_cc
     gr_multiply_const_ff
+    gr_multiply_conjugate_cc
     gr_nlog10_ff
     gr_nop
     gr_null_sink
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index 89152d663..ea3b31fd3 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -99,6 +99,7 @@ libgeneral_la_SOURCES = 		\
 	gr_multiply_ff.cc		\
 	gr_multiply_const_cc.cc		\
 	gr_multiply_const_ff.cc		\
+	gr_multiply_conjugate_cc.cc	\
 	gr_nlog10_ff.cc			\
 	gr_nop.cc			\
 	gr_null_sink.cc			\
@@ -259,6 +260,7 @@ grinclude_HEADERS = 			\
 	gr_multiply_ff.h		\
 	gr_multiply_const_cc.h		\
 	gr_multiply_const_ff.h		\
+	gr_multiply_conjugate_cc.h	\
 	gr_nco.h			\
 	gr_nlog10_ff.h			\
 	gr_nop.h			\
@@ -423,6 +425,7 @@ swiginclude_HEADERS =			\
 	gr_multiply_ff.i		\
 	gr_multiply_const_cc.i		\
 	gr_multiply_const_ff.i		\
+	gr_multiply_conjugate_cc.i	\
 	gr_nlog10_ff.i			\
 	gr_nop.i			\
 	gr_null_sink.i			\
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index c384ecfbb..89738b01a 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -110,6 +110,7 @@
 #include <gr_multiply_ff.h>
 #include <gr_multiply_const_cc.h>
 #include <gr_multiply_const_ff.h>
+#include <gr_multiply_conjugate_cc.h>
 #include <gr_feval.h>
 #include <gr_pwr_squelch_cc.h>
 #include <gr_pwr_squelch_ff.h>
@@ -231,6 +232,7 @@
 %include "gr_multiply_ff.i"
 %include "gr_multiply_const_cc.i"
 %include "gr_multiply_const_ff.i"
+%include "gr_multiply_conjugate_cc.i"
 %include "gr_feval.i"
 %include "gr_pwr_squelch_cc.i"
 %include "gr_pwr_squelch_ff.i"
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.cc b/gnuradio-core/src/lib/general/gr_add_ff.cc
index a5db5ec5c..fc5455c98 100644
--- a/gnuradio-core/src/lib/general/gr_add_ff.cc
+++ b/gnuradio-core/src/lib/general/gr_add_ff.cc
@@ -54,9 +54,6 @@ gr_add_ff::work(int noutput_items,
   int noi = d_vlen*noutput_items;
 
   memcpy(out, input_items[0], noi*sizeof(float));
-  for(size_t i = 1; i < input_items.size(); i++)
-    volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi);
-  /*
   if(is_unaligned()) {
     for(size_t i = 1; i < input_items.size(); i++)
       volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi);
@@ -65,6 +62,5 @@ gr_add_ff::work(int noutput_items,
     for(size_t i = 1; i < input_items.size(); i++)
       volk_32f_x2_add_32f_a(out, out, (const float*)input_items[i], noi);
   }
-  */
   return noutput_items;
 }
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc
new file mode 100644
index 000000000..103d87b8b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_conjugate_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen)
+{
+  return gnuradio::get_initial_sptr(new gr_multiply_conjugate_cc (vlen));
+}
+
+gr_multiply_conjugate_cc::gr_multiply_conjugate_cc (size_t vlen)
+  : gr_sync_block ("gr_multiply_conjugate_cc",
+		   gr_make_io_signature (2, 2, sizeof (gr_complex)*vlen),
+		   gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+    d_vlen(vlen)
+{
+ const int alignment_multiple =
+   volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_conjugate_cc::work (int noutput_items,
+				gr_vector_const_void_star &input_items,
+				gr_vector_void_star &output_items)
+{
+  gr_complex *in0 = (gr_complex *) input_items[0];
+  gr_complex *in1 = (gr_complex *) input_items[1];
+  gr_complex *out = (gr_complex *) output_items[0];
+  int noi = d_vlen*noutput_items;
+
+  if(is_unaligned()) {
+    volk_32fc_x2_multiply_conjugate_32fc_u(out, in0, in1, noi);
+  }
+  else {
+    volk_32fc_x2_multiply_conjugate_32fc_a(out, in0, in1, noi);
+  }
+
+  return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h
new file mode 100644
index 000000000..eb032f31b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h
@@ -0,0 +1,57 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H
+#define INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_conjugate_cc;
+typedef boost::shared_ptr<gr_multiply_conjugate_cc>
+gr_multiply_conjugate_cc_sptr;
+
+GR_CORE_API gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen=1);
+
+/*!
+ * \brief Multiplies a stream by the conjugate of the second stream
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_conjugate_cc : public gr_sync_block
+{
+ private:
+  friend GR_CORE_API gr_multiply_conjugate_cc_sptr
+    gr_make_multiply_conjugate_cc (size_t vlen);
+  gr_multiply_conjugate_cc (size_t vlen);
+  
+  size_t d_vlen;
+
+ public:
+  virtual int work (int noutput_items,
+		    gr_vector_const_void_star &input_items,
+		    gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i
new file mode 100644
index 000000000..023410505
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_conjugate_cc)
+
+gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen=1);
+
+class gr_multiply_conjugate_cc : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
index 16dd14790..3c9edcf5b 100644
--- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am
+++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
@@ -79,6 +79,7 @@ noinst_PYTHON = 			\
 	qa_kludged_imports.py		\
 	qa_max.py			\
 	qa_message.py			\
+	qa_multiply_conjugate.py	\
 	qa_mute.py			\
 	qa_nlog10.py			\
 	qa_noise.py			\
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py
new file mode 100644
index 000000000..aaf3cc125
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+
+class test_multiply_conjugate (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_000 (self):
+        src_data0 = (-2-2j, -1-1j, -2+2j, -1+1j,
+                      2-2j,  1-1j,  2+2j,  1+1j,
+                      0+0j)
+        src_data1 = (-3-3j, -4-4j, -3+3j, -4+4j,
+                      3-3j,  4-4j,  3+3j,  4+4j,
+                      0+0j)
+        
+        exp_data = (12+0j, 8+0j, 12+0j, 8+0j, 
+                    12+0j, 8+0j, 12+0j, 8+0j,
+                    0+0j)
+        src0 = gr.vector_source_c(src_data0)
+        src1 = gr.vector_source_c(src_data1)
+        op = gr.multiply_conjugate_cc ()
+        dst = gr.vector_sink_c ()
+
+        self.tb.connect(src0, (op,0))
+        self.tb.connect(src1, (op,1))
+        self.tb.connect(op, dst)
+        self.tb.run()
+        result_data = dst.data ()
+        self.assertEqual (exp_data, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_multiply_conjugate, "test_multiply_conjugate.xml")
-- 
cgit 


From cdb328758dca9fa494956c0e62f5e78adf613982 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 16:59:50 -0500
Subject: volk: fixed complex multiply and conjugate kernel to use xor for
 conjugation.

---
 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h | 5 ++---
 volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
index 70476a8c7..2a1bcbce0 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
@@ -24,15 +24,14 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVecto
     const lv_32fc_t* a = aVector;
     const lv_32fc_t* b = bVector;
 
-    __m128 conjugator = _mm_setr_ps(1, -1, 1, -1);
+    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
       
       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
 
-      // FIXME: replace with xor for a faster implementation
-      y = _mm_mul_ps(y, conjugator); // conjugate y
+      y = _mm_xor_ps(y, conjugator); // conjugate y
       
       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
index fbaa29c17..92f6a051e 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
@@ -24,7 +24,7 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVecto
     const lv_32fc_t* a = aVector;
     const lv_32fc_t* b = bVector;
 
-    __m128 conjugator = _mm_set_ps(0, 0x80000000, 0, 0x80000000);
+    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
 
     for(;number < halfPoints; number++){
       
-- 
cgit 


From 3080cd75a6a10aab757e1e02fb99e81e2f3724d5 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 17:23:44 -0500
Subject: volk: adding complex conjugate kernel.

---
 volk/apps/volk_profile.cc                      |  2 +
 volk/include/volk/Makefile.am                  |  5 +-
 volk/include/volk/volk_32fc_conjugate_32fc_a.h | 64 ++++++++++++++++++++++++++
 volk/include/volk/volk_32fc_conjugate_32fc_u.h | 64 ++++++++++++++++++++++++++
 volk/lib/testqa.cc                             |  2 +
 5 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 volk/include/volk/volk_32fc_conjugate_32fc_a.h
 create mode 100644 volk/include/volk/volk_32fc_conjugate_32fc_u.h

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 6ba7f17bb..0da21ffa5 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -58,6 +58,8 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index d071f18f2..f6b5835b1 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -134,4 +134,7 @@ volkinclude_HEADERS = \
 	volk_8i_convert_16i_a.h \
 	volk_8i_convert_16i_u.h \
 	volk_8i_s32f_convert_32f_a.h \
-	volk_8i_s32f_convert_32f_u.h 
+	volk_8i_s32f_convert_32f_u.h \
+	volk_32fc_conjugate_32fc_a.h \
+	volk_32fc_conjugate_32fc_u.h
+
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
new file mode 100644
index 000000000..1518af9be
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
+#define INCLUDED_volk_32fc_conjugate_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Takes the conjugate of a complex vector.
+    \param cVector The vector where the results will be stored
+    \param aVector Vector to be conjugated
+    \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+  */
+static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+
+    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+      x = _mm_xor_ps(x, conjugator); // conjugate register
+    
+      _mm_store_ps((float*)c,x); // Store the results back into the C container
+
+      a += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = lv_conj(*a);
+    }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Takes the conjugate of a complex vector.
+    \param cVector The vector where the results will be stored
+    \param aVector Vector to be conjugated
+    \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+  */
+static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = lv_conj(*aPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
new file mode 100644
index 000000000..b26fe0789
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
+#define INCLUDED_volk_32fc_conjugate_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+  /*!
+    \brief Takes the conjugate of a complex vector.
+    \param cVector The vector where the results will be stored
+    \param aVector Vector to be conjugated
+    \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+  */
+static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int halfPoints = num_points / 2;
+
+    __m128 x;
+    lv_32fc_t* c = cVector;
+    const lv_32fc_t* a = aVector;
+  
+    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+    for(;number < halfPoints; number++){
+      
+      x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
+      
+      x = _mm_xor_ps(x, conjugator); // conjugate register
+
+      _mm_storeu_ps((float*)c,x); // Store the results back into the C container
+
+      a += 2;
+      c += 2;
+    }
+
+    if((num_points % 2) != 0) {
+      *c = lv_conj(*a);
+    }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+  /*!
+    \brief Takes the conjugate of a complex vector.
+    \param cVector The vector where the results will be stored
+    \param aVector Vector to be conjugated
+    \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+  */
+static inline void volk_32fc_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+      *cPtr++ = lv_conj(*aPtr++);
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index fdd3d4853..593087f85 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -93,6 +93,8 @@ VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
 VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);
-- 
cgit 


From 75bb99df4720789749c059a0207507a3cbdd3855 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 17:49:36 -0500
Subject: core: using volk for conjugate block and added QA code for it.

---
 gnuradio-core/src/lib/general/gr_conjugate_cc.cc   | 27 ++++-------
 gnuradio-core/src/python/gnuradio/gr/Makefile.am   |  1 +
 .../src/python/gnuradio/gr/qa_conjugate.py         | 53 ++++++++++++++++++++++
 3 files changed, 62 insertions(+), 19 deletions(-)
 create mode 100644 gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py

diff --git a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
index 59c3bae89..d2b20ffe6 100644
--- a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
+++ b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
@@ -28,6 +28,7 @@
 
 #include <gr_conjugate_cc.h>
 #include <gr_io_signature.h>
+#include <volk/volk.h>
 
 gr_conjugate_cc_sptr
 gr_make_conjugate_cc ()
@@ -40,6 +41,9 @@ gr_conjugate_cc::gr_conjugate_cc ()
 		   gr_make_io_signature (1, 1, sizeof (gr_complex)),
 		   gr_make_io_signature (1, 1, sizeof (gr_complex)))
 {
+  const int alignment_multiple = 
+    volk_get_alignment() / sizeof(gr_complex);
+  set_alignment(alignment_multiple);
 }
 
 int
@@ -50,26 +54,11 @@ gr_conjugate_cc::work (int noutput_items,
   gr_complex *iptr = (gr_complex *) input_items[0];
   gr_complex *optr = (gr_complex *) output_items[0];
 
-  int	size = noutput_items;
-
-  while (size >= 8){
-    optr[0] = conj(iptr[0]);
-    optr[1] = conj(iptr[1]);
-    optr[2] = conj(iptr[2]);
-    optr[3] = conj(iptr[3]);
-    optr[4] = conj(iptr[4]);
-    optr[5] = conj(iptr[5]);
-    optr[6] = conj(iptr[6]);
-    optr[7] = conj(iptr[7]);
-    size -= 8;
-    optr += 8;
-    iptr += 8;
+  if(is_unaligned()) {
+    volk_32fc_conjugate_32fc_u(optr, iptr, noutput_items);
   }
-
-  while (size-- > 0) {
-    *optr = conj(*iptr);
-    iptr++;
-    optr++;
+  else {
+    volk_32fc_conjugate_32fc_a(optr, iptr, noutput_items);
   }
 
   return noutput_items;
diff --git a/gnuradio-core/src/python/gnuradio/gr/Makefile.am b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
index 3c9edcf5b..9853766f9 100644
--- a/gnuradio-core/src/python/gnuradio/gr/Makefile.am
+++ b/gnuradio-core/src/python/gnuradio/gr/Makefile.am
@@ -50,6 +50,7 @@ noinst_PYTHON = 			\
 	qa_bin_statistics.py		\
 	qa_classify.py			\
 	qa_complex_to_xxx.py		\
+	qa_conjugate.py			\
 	qa_copy.py			\
 	qa_delay.py			\
 	qa_dc_blocker.py		\
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py
new file mode 100644
index 000000000..c07902a5a
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+from gnuradio import gr, gr_unittest
+
+class test_conjugate (gr_unittest.TestCase):
+
+    def setUp (self):
+        self.tb = gr.top_block ()
+
+    def tearDown (self):
+        self.tb = None
+
+    def test_000 (self):
+        src_data = (-2-2j, -1-1j, -2+2j, -1+1j,
+                     2-2j,  1-1j,  2+2j,  1+1j,
+                     0+0j)
+        
+        exp_data = (-2+2j, -1+1j, -2-2j, -1-1j,
+                     2+2j,  1+1j,  2-2j,  1-1j,
+                     0-0j)
+        
+        src = gr.vector_source_c(src_data)
+        op = gr.conjugate_cc ()
+        dst = gr.vector_sink_c ()
+
+        self.tb.connect(src, op)
+        self.tb.connect(op, dst)
+        self.tb.run()
+        result_data = dst.data ()
+        self.assertEqual (exp_data, result_data)
+
+if __name__ == '__main__':
+    gr_unittest.run(test_conjugate, "test_conjugate.xml")
-- 
cgit 


From 786058aacbe0ca662e14ea5f00f1c0872a599577 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 18:32:09 -0500
Subject: volk: adding an examples directory with scripts to benchmark and
 compare volk-optimized GR blocks.

---
 .../python/volk_benchmark/volk_math.py             | 146 ++++++++++++++++++
 .../python/volk_benchmark/volk_plot.py             |  81 ++++++++++
 .../python/volk_benchmark/volk_test_funcs.py       | 171 +++++++++++++++++++++
 3 files changed, 398 insertions(+)
 create mode 100755 gnuradio-examples/python/volk_benchmark/volk_math.py
 create mode 100755 gnuradio-examples/python/volk_benchmark/volk_plot.py
 create mode 100644 gnuradio-examples/python/volk_benchmark/volk_test_funcs.py

diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py
new file mode 100755
index 000000000..ec85ce0ad
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_math.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import argparse
+from volk_test_funcs import *
+
+def multiply_const_cc(N):
+    k = 3.3
+    op = gr.multiply_const_cc(k)
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1)
+    return tb
+
+######################################################################
+
+def multiply_const_ff(N):
+    k = 3.3
+    op = gr.multiply_const_ff(k)
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def multiply_cc(N):
+    op = gr.multiply_cc()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+    return tb
+
+######################################################################
+
+def multiply_ff(N):
+    op = gr.multiply_ff()
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1)
+    return tb
+
+######################################################################
+
+def add_ff(N):
+    op = gr.add_ff()
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1)
+    return tb
+
+######################################################################
+
+def conjugate_cc(N):
+    op = gr.conjugate_cc()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1)
+    return tb
+
+######################################################################
+
+def multiply_conjugate_cc_volk(N):
+    op = gr.multiply_conjugate_cc()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+    return tb
+
+def multiply_conjugate_cc_nonvolk(N):
+    class s(gr.hier_block2):
+        def __init__(self):
+            gr.hier_block2.__init__(self, "s",
+                                    gr.io_signature(2, 2, gr.sizeof_gr_complex),
+                                    gr.io_signature(1, 1, gr.sizeof_gr_complex))
+            conj = gr.conjugate_cc()
+            mult = gr.multiply_cc()
+            self.connect((self,0), (mult,0))
+            self.connect((self,1), conj, (mult,1))
+            self.connect(mult, self)
+
+    op = s()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+    return tb
+
+#multiply_conjugate_cc = multiply_conjugate_cc_volk
+multiply_conjugate_cc = multiply_conjugate_cc_nonvolk
+
+######################################################################
+
+def run_tests(func, N, iters):
+    print("Running Test: {0}".format(func.__name__))
+    tb = func(N)
+    t = timeit(tb, iters)
+    res = format_results(func.__name__, t)
+    return res
+
+def main():
+    avail_tests = [multiply_const_cc,
+                   multiply_const_ff,
+                   multiply_cc,
+                   multiply_ff,
+                   add_ff,
+                   conjugate_cc,
+                   multiply_conjugate_cc]
+
+    desc='Time an operation to compare with other implementations. \
+          This program runs a simple GNU Radio flowgraph to test a \
+          particular math function, mostly to compare the  \
+          Volk-optimized implementation versus a regular \
+          implementation. The results are stored to an SQLite database \
+          that can then be read by volk_plot.py to plot the differences.'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('label', type=str,
+                        default=None,
+                        help='Label of database table [default: %(default)s]')
+    parser.add_argument('-D', '--database', type=str,
+                        default="volk_results.db",
+                        help='Database file to store data in [default: %(default)s]')
+    parser.add_argument('-N', '--nitems', type=float,
+                        default=1e9,
+                        help='Number of items per iterations [default: %(default)s]')
+    parser.add_argument('-I', '--iterations', type=int,
+                        default=20,
+                        help='Number of iterations [default: %(default)s]')
+    parser.add_argument('--test', type=int,
+                        choices=xrange(len(avail_tests)),
+                        help='Test to run')
+    parser.add_argument('--list', action='store_true',
+                        help='List the available tests')
+    parser.add_argument('--all', action='store_true',
+                        help='Run all tests')
+    args = parser.parse_args()
+
+    if(args.list):
+        print "Available Tests to Run:"
+        print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)])
+        sys.exit(0)      
+
+    N = int(args.nitems)
+    iters = args.iterations
+    label = args.label
+
+    conn = create_connection(args.database)
+    new_table(conn, label)
+
+    if not args.all:
+        func = avail_tests[args.test]
+        res = run_tests(func, N, iters)
+        replace_results(conn, label, N, iters, res)
+    else:
+        for f in avail_tests:
+            res = run_tests(f, N, iters)
+            replace_results(conn, label, N, iters, res)
+            
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        pass
diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py
new file mode 100755
index 000000000..665df5e14
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+import sys
+import argparse
+from volk_test_funcs import *
+
+try:
+    import matplotlib
+    import matplotlib.pyplot as plt
+except ImportError:
+    sys.stderr.write("Could not import Matplotlib (http://matplotlib.sourceforge.net/)\n")
+    sys.exit(1)
+
+def main():
+    desc='Plot Volk performance results from a SQLite database. ' + \
+        'Run one of the volk tests first (e.g, volk_math.py)'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('-D', '--database', type=str,
+                        default="volk_results.db",
+                        help='Database file to read data from [default: %(default)s]')
+    args = parser.parse_args()
+    
+    # Set up global plotting properties
+    matplotlib.rcParams['figure.subplot.bottom'] = 0.2
+    matplotlib.rcParams['figure.subplot.top'] = 0.95
+    matplotlib.rcParams['ytick.labelsize'] = 16
+    matplotlib.rcParams['xtick.labelsize'] = 16
+    matplotlib.rcParams['legend.fontsize'] = 18
+    
+    # Get list of tables to compare
+    conn = create_connection(args.database)
+    tables = list_tables(conn)
+    M = len(tables)
+
+    # width of bars depends on number of comparisons
+    wdth = 0.80/M
+
+    colors = ['b', 'r', 'g', 'm', 'k']
+
+    # Set up figure for plotting
+    f0 = plt.figure(0, facecolor='w', figsize=(14,10))
+    s0 = f0.add_subplot(1,1,1)
+
+    for i,table in enumerate(tables):
+        # Get results from the next table
+        res = get_results(conn, table[0])
+    
+        xlabels = []
+        averages = []
+        variances = []
+        maxes = []
+        mins = []
+        for r in res:
+            xlabels.append(r['kernel'])
+            averages.append(r['avg'])
+            variances.append(r['var'])
+            maxes.append(r['max'])
+            mins.append(r['min'])
+
+        # makes x values for this data set placement
+        x0 = xrange(len(res))
+        x1 = [x + i*wdth for x in x0]
+
+        s0.bar(x1, averages, width=wdth,
+               #yerr=variances,
+               color=colors[i%M], label=table[0],
+               edgecolor='k', linewidth=2)
+
+    s0.legend()
+    s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']),
+                  fontsize=22, fontweight='bold')
+    s0.set_xticks(x0)
+    s0.set_xticklabels(xlabels)
+    for label in s0.xaxis.get_ticklabels():
+        label.set_rotation(45)
+        label.set_fontsize(16)
+
+    plt.show()
+
+if __name__ == "__main__":
+    main()
diff --git a/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py
new file mode 100644
index 000000000..4f4e4afd3
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import math, sys, os, time
+
+try:
+    import scipy
+except ImportError:
+    sys.stderr.write("Unable to import Scipy (www.scipy.org)\n")
+    sys.exit(1)
+
+try:
+    import sqlite3
+except ImportError:
+    sys.stderr.write("Unable to import sqlite3: requires Python 2.5\n")
+    sys.exit(1)
+
+def execute(conn, cmd):
+    '''
+    Executes the command cmd to the database opened in connection conn.
+    '''
+    c = conn.cursor()
+    c.execute(cmd)
+    conn.commit()
+    c.close()
+
+def create_connection(database):
+    '''
+    Returns a connection object to the SQLite database.
+    '''
+    return sqlite3.connect(database)
+
+def new_table(conn, tablename):
+    '''
+    Create a new table for results.
+    All results are in the form: [kernel | nitems | iters | avg. time | variance | max time | min time ]
+    Each table is meant as a different setting (e.g., volk_aligned, volk_unaligned, etc.)
+    '''
+    cols = "kernel text, nitems int, iters int, avg real, var real, max real, min real"
+    cmd = "create table if not exists {0} ({1})".format(
+        tablename, cols)
+    execute(conn, cmd)
+
+def replace_results(conn, tablename, nitems, iters, res):
+    '''
+    Inserts or replaces the results 'res' dictionary values into the table.
+    This deletes all old entries of the kernel in this table.
+    '''
+    cmd = "DELETE FROM {0} where kernel='{1}'".format(tablename, res["kernel"])
+    execute(conn, cmd)
+    insert_results(conn, tablename, nitems, iters, res)
+
+def insert_results(conn, tablename, nitems, iters, res):
+    '''
+    Inserts the results dictionary values into the table.
+    '''
+    cols = "kernel, nitems, iters, avg, var, max, min"
+    cmd = "INSERT INTO {0} ({1}) VALUES ('{2}', {3}, {4}, {5}, {6}, {7}, {8})".format(
+        tablename, cols, res["kernel"], nitems, iters, 
+        res["avg"], res["var"], res["max"], res["min"])
+    execute(conn, cmd)
+
+def list_tables(conn):
+    '''
+    Returns a list of all tables in the database.
+    '''
+    cmd = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+    c = conn.cursor()
+    c.execute(cmd)
+    t = c.fetchall()
+    c.close()
+
+    return t
+    
+def get_results(conn, tablename):
+    '''
+    Gets all results in tablename.
+    '''
+    cmd = "SELECT * FROM {0}".format(tablename)
+    c = conn.cursor()
+    c.execute(cmd)
+    fetched = c.fetchall()
+    c.close()
+
+    res = list()
+    for f in fetched:
+        r = dict()
+        r['kernel'] = f[0]
+        r['nitems'] = f[1]
+        r['iters']  = f[2]
+        r['avg'] = f[3]
+        r['var'] = f[4]
+        r['min'] = f[5]
+        r['max'] = f[6]
+        res.append(r)
+
+    return res
+    
+
+class helper(gr.top_block):
+    '''
+    Helper function to run the tests. The parameters are:
+      N: number of items to process (int)
+      op: The GR block/hier_block to test
+      isizeof: the sizeof the input type
+      osizeof: the sizeof the output type
+      nsrcs: number of inputs to the op
+      nsnks: number of outputs of the op
+
+    This function can only handle blocks where all inputs are the same
+    datatype and all outputs are the same data type
+    '''
+    def __init__(self, N, op,
+                 isizeof=gr.sizeof_gr_complex,
+                 osizeof=gr.sizeof_gr_complex,
+                 nsrcs=1, nsnks=1):
+        gr.top_block.__init__(self, "helper")
+
+        self.op = op
+        self.srcs = []
+        self.snks = []
+        self.head = gr.head(isizeof, N)
+
+        for n in xrange(nsrcs):
+            self.srcs.append(gr.null_source(isizeof))
+
+        for n in xrange(nsnks):
+            self.snks.append(gr.null_sink(osizeof))
+
+        self.connect(self.srcs[0], self.head, (self.op,0))
+
+        for n in xrange(1, nsrcs):
+            self.connect(self.srcs[n], (self.op,n))
+
+        for n in xrange(nsnks):
+            self.connect((self.op,n), self.snks[n])
+
+def timeit(tb, iterations):
+    '''
+    Given a top block, this function times it for a number of
+    iterations and stores the time in a list that is returned.
+    '''
+    r = gr.enable_realtime_scheduling()
+    if r != gr.RT_OK:
+        print "Warning: failed to enable realtime scheduling"
+
+    times = []
+    for i in xrange(iterations):
+        start_time = time.time()
+        tb.run()
+        end_time = time.time()
+        tb.head.reset()
+
+        times.append(end_time - start_time)
+
+    return times
+
+def format_results(kernel, times):
+    '''
+    Convinience function to convert the results of the timeit function
+    into a dictionary.
+    '''
+    res = dict()
+    res["kernel"] = kernel
+    res["avg"] = scipy.mean(times)
+    res["var"] = scipy.var(times)
+    res["max"] = max(times)
+    res["min"] = min(times)
+    return res
+
+
-- 
cgit 


From 5a07519b2685fabab3e75380657a53d2161dc1a2 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 22:17:28 -0500
Subject: core: fixed alignment call for char_to_short.

---
 gnuradio-core/src/lib/general/gr_char_to_short.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc
index 40ffa9338..13375412c 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc
@@ -41,7 +41,7 @@ gr_char_to_short::gr_char_to_short (size_t vlen)
     d_vlen(vlen)
 {
   const int alignment_multiple =
-    volk_get_alignment() / sizeof(float);
+    volk_get_alignment() / sizeof(short);
   set_alignment(alignment_multiple);
 }
 
-- 
cgit 


From 4c048e77d0f7f78cd684534133a9312be936fcc6 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Feb 2012 22:18:00 -0500
Subject: volk: test commands for measuring type conversion performance.

---
 .../python/volk_benchmark/volk_types.py            | 183 +++++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100755 gnuradio-examples/python/volk_benchmark/volk_types.py

diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py
new file mode 100755
index 000000000..3dd10ae96
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_types.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import argparse
+from volk_test_funcs import *
+
+######################################################################
+
+def float_to_char(N):
+    op = gr.float_to_char()
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_char, 1, 1)
+    return tb
+
+######################################################################
+
+def float_to_int(N):
+    op = gr.float_to_int()
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_int, 1, 1)
+    return tb
+
+######################################################################
+
+def float_to_short(N):
+    op = gr.float_to_short()
+    tb = helper(N, op, gr.sizeof_float, gr.sizeof_short, 1, 1)
+    return tb
+
+######################################################################
+
+def short_to_float(N):
+    op = gr.short_to_float()
+    tb = helper(N, op, gr.sizeof_short, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def short_to_char(N):
+    op = gr.short_to_char()
+    tb = helper(N, op, gr.sizeof_short, gr.sizeof_char, 1, 1)
+    return tb
+
+######################################################################
+
+def char_to_short(N):
+    op = gr.char_to_short()
+    tb = helper(N, op, gr.sizeof_char, gr.sizeof_short, 1, 1)
+    return tb
+
+######################################################################
+
+def char_to_float(N):
+    op = gr.char_to_float()
+    tb = helper(N, op, gr.sizeof_char, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def int_to_float(N):
+    op = gr.int_to_float()
+    tb = helper(N, op, gr.sizeof_int, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def complex_to_float(N):
+    op = gr.complex_to_float()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 2)
+    return tb
+
+######################################################################
+
+def complex_to_real(N):
+    op = gr.complex_to_real()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def complex_to_imag(N):
+    op = gr.complex_to_imag()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def complex_to_mag(N):
+    op = gr.complex_to_mag()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def complex_to_mag_squared(N):
+    op = gr.complex_to_mag_squared()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def complex_to_arg(N):
+    op = gr.complex_to_arg()
+    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+    return tb
+
+######################################################################
+
+def run_tests(func, N, iters):
+    print("Running Test: {0}".format(func.__name__))
+    tb = func(N)
+    t = timeit(tb, iters)
+    res = format_results(func.__name__, t)
+    return res
+
+def main():
+    avail_tests = [float_to_char,
+                   float_to_int,
+                   float_to_short,
+                   short_to_float,
+                   short_to_char,
+                   char_to_short,
+                   char_to_float,
+                   int_to_float,
+                   complex_to_float,
+                   complex_to_real,
+                   complex_to_imag,
+                   complex_to_mag,
+                   complex_to_mag_squared,
+                   complex_to_arg]
+
+    desc='Time an operation to compare with other implementations. \
+          This program runs a simple GNU Radio flowgraph to test a \
+          particular math function, mostly to compare the  \
+          Volk-optimized implementation versus a regular \
+          implementation. The results are stored to an SQLite database \
+          that can then be read by volk_plot.py to plot the differences.'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('label', type=str,
+                        default=None,
+                        help='Label of database table [default: %(default)s]')
+    parser.add_argument('-D', '--database', type=str,
+                        default="volk_results.db",
+                        help='Database file to store data in [default: %(default)s]')
+    parser.add_argument('-N', '--nitems', type=float,
+                        default=1e9,
+                        help='Number of items per iterations [default: %(default)s]')
+    parser.add_argument('-I', '--iterations', type=int,
+                        default=20,
+                        help='Number of iterations [default: %(default)s]')
+    parser.add_argument('--test', type=int,
+                        choices=xrange(len(avail_tests)),
+                        help='Test to run')
+    parser.add_argument('--list', action='store_true',
+                        help='List the available tests')
+    parser.add_argument('--all', action='store_true',
+                        help='Run all tests')
+    args = parser.parse_args()
+
+    if(args.list):
+        print "Available Tests to Run:"
+        print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)])
+        sys.exit(0)      
+
+    N = int(args.nitems)
+    iters = args.iterations
+    label = args.label
+
+    conn = create_connection(args.database)
+    new_table(conn, label)
+
+    if not args.all:
+        func = avail_tests[args.test]
+        res = run_tests(func, N, iters)
+        replace_results(conn, label, N, iters, res)
+    else:
+        for f in avail_tests:
+            res = run_tests(f, N, iters)
+            replace_results(conn, label, N, iters, res)
+            
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        pass
-- 
cgit 


From a9a2c632040d37562a64eb81ed7d4f136a7a774e Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 9 Feb 2012 20:49:44 -0500
Subject: volk: improved GR benchmark and plotting utilities.

---
 .../python/volk_benchmark/volk_math.py             | 61 ++++++++--------
 .../python/volk_benchmark/volk_plot.py             | 84 +++++++++++++++++-----
 .../python/volk_benchmark/volk_types.py            | 18 +++--
 3 files changed, 110 insertions(+), 53 deletions(-)

diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py
index ec85ce0ad..42f3ffa4b 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_math.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_math.py
@@ -48,38 +48,41 @@ def conjugate_cc(N):
 
 ######################################################################
 
-def multiply_conjugate_cc_volk(N):
-    op = gr.multiply_conjugate_cc()
-    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
-    return tb
-
-def multiply_conjugate_cc_nonvolk(N):
-    class s(gr.hier_block2):
-        def __init__(self):
-            gr.hier_block2.__init__(self, "s",
-                                    gr.io_signature(2, 2, gr.sizeof_gr_complex),
-                                    gr.io_signature(1, 1, gr.sizeof_gr_complex))
-            conj = gr.conjugate_cc()
-            mult = gr.multiply_cc()
-            self.connect((self,0), (mult,0))
-            self.connect((self,1), conj, (mult,1))
-            self.connect(mult, self)
-
-    op = s()
-    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
-    return tb
+def multiply_conjugate_cc(N):
+    try:
+        op = gr.multiply_conjugate_cc()
+        tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+        return tb
+
+    except AttributeError:
+        class s(gr.hier_block2):
+            def __init__(self):
+                gr.hier_block2.__init__(self, "s",
+                                        gr.io_signature(2, 2, gr.sizeof_gr_complex),
+                                        gr.io_signature(1, 1, gr.sizeof_gr_complex))
+                conj = gr.conjugate_cc()
+                mult = gr.multiply_cc()
+                self.connect((self,0), (mult,0))
+                self.connect((self,1), conj, (mult,1))
+                self.connect(mult, self)
+
+        op = s()
+        tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+        return tb
 
-#multiply_conjugate_cc = multiply_conjugate_cc_volk
-multiply_conjugate_cc = multiply_conjugate_cc_nonvolk
 
 ######################################################################
 
 def run_tests(func, N, iters):
     print("Running Test: {0}".format(func.__name__))
-    tb = func(N)
-    t = timeit(tb, iters)
-    res = format_results(func.__name__, t)
-    return res
+    try:
+        tb = func(N)
+        t = timeit(tb, iters)
+        res = format_results(func.__name__, t)
+        return res
+    except AttributeError:
+        print "\tCould not run test. Skipping."
+        return None
 
 def main():
     avail_tests = [multiply_const_cc,
@@ -133,11 +136,13 @@ def main():
     if not args.all:
         func = avail_tests[args.test]
         res = run_tests(func, N, iters)
-        replace_results(conn, label, N, iters, res)
+        if res is not None:
+            replace_results(conn, label, N, iters, res)
     else:
         for f in avail_tests:
             res = run_tests(f, N, iters)
-            replace_results(conn, label, N, iters, res)
+            if res is not None:
+                replace_results(conn, label, N, iters, res)
             
 if __name__ == "__main__":
     try:
diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py
index 665df5e14..d7578c5a7 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_plot.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-import sys
+import sys, math
 import argparse
 from volk_test_funcs import *
 
@@ -16,8 +16,15 @@ def main():
         'Run one of the volk tests first (e.g, volk_math.py)'
     parser = argparse.ArgumentParser(description=desc)
     parser.add_argument('-D', '--database', type=str,
-                        default="volk_results.db",
+                        default='volk_results.db',
                         help='Database file to read data from [default: %(default)s]')
+    parser.add_argument('-E', '--errorbars',
+                        action='store_true', default=False,
+                        help='Show error bars (1 standard dev.)')
+    parser.add_argument('-P', '--plot', type=str,
+                        choices=['mean', 'min', 'max'],
+                        default='mean',
+                        help='Set the type of plot to produce [default: %(default)s]')
     args = parser.parse_args()
     
     # Set up global plotting properties
@@ -35,42 +42,81 @@ def main():
     # width of bars depends on number of comparisons
     wdth = 0.80/M
 
+    # Colors to distinguish each table in the bar graph
+    # More than 5 tables will wrap around to the start.
     colors = ['b', 'r', 'g', 'm', 'k']
 
     # Set up figure for plotting
     f0 = plt.figure(0, facecolor='w', figsize=(14,10))
     s0 = f0.add_subplot(1,1,1)
 
+    # Create a register of names that exist in all tables
+    tmp_regs = []
+    for table in tables:
+        # Get results from the next table
+        res = get_results(conn, table[0])
+
+        tmp_regs.append(list())
+        for r in res:
+            try:
+                tmp_regs[-1].index(r['kernel'])
+            except ValueError:
+                tmp_regs[-1].append(r['kernel'])
+
+    # Get only those names that are common in all tables            
+    name_reg = tmp_regs[0]
+    for t in tmp_regs[1:]:
+        name_reg = list(set(name_reg) & set(t))
+    name_reg.sort()
+
+    # Pull the data out for each table into a dictionary
+    # we can ref the table by it's name and the data associated
+    # with a given kernel in name_reg by it's name.
+    # This ensures there is no sorting issue with the data in the
+    # dictionary, so the kernels are plotted against each other.
+    table_data = dict()
     for i,table in enumerate(tables):
         # Get results from the next table
         res = get_results(conn, table[0])
-    
-        xlabels = []
-        averages = []
-        variances = []
-        maxes = []
-        mins = []
+
+        data = dict()
         for r in res:
-            xlabels.append(r['kernel'])
-            averages.append(r['avg'])
-            variances.append(r['var'])
-            maxes.append(r['max'])
-            mins.append(r['min'])
+            data[r['kernel']] = r
+
+        table_data[table[0]] = data
 
+    # Plot the results
+    x0 = xrange(len(name_reg))
+    for i,t in enumerate(table_data):
         # makes x values for this data set placement
-        x0 = xrange(len(res))
         x1 = [x + i*wdth for x in x0]
 
-        s0.bar(x1, averages, width=wdth,
-               #yerr=variances,
-               color=colors[i%M], label=table[0],
-               edgecolor='k', linewidth=2)
+        ydata = []
+        stds = []
+        for name in name_reg:
+            stds.append(math.sqrt(table_data[t][name]['var']))
+            if(args.plot == 'max'):
+                ydata.append(table_data[t][name]['max'])
+            elif(args.plot == 'min'):
+                ydata.append(table_data[t][name]['min'])
+            if(args.plot == 'mean'):
+                ydata.append(table_data[t][name]['avg'])
+
+        if(args.errorbars is False):
+            stds = None
+
+        s0.bar(x1, ydata, width=wdth,
+               yerr=stds,
+               color=colors[i%M], label=t,
+               edgecolor='k', linewidth=2,
+               error_kw={"ecolor": 'k', "capsize":5,
+                         "linewidth":2})
 
     s0.legend()
     s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']),
                   fontsize=22, fontweight='bold')
     s0.set_xticks(x0)
-    s0.set_xticklabels(xlabels)
+    s0.set_xticklabels(name_reg)
     for label in s0.xaxis.get_ticklabels():
         label.set_rotation(45)
         label.set_fontsize(16)
diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py
index 3dd10ae96..8041ccac1 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_types.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_types.py
@@ -106,10 +106,14 @@ def complex_to_arg(N):
 
 def run_tests(func, N, iters):
     print("Running Test: {0}".format(func.__name__))
-    tb = func(N)
-    t = timeit(tb, iters)
-    res = format_results(func.__name__, t)
-    return res
+    try:
+        tb = func(N)
+        t = timeit(tb, iters)
+        res = format_results(func.__name__, t)
+        return res
+    except AttributeError:
+        print "\tCould not run test. Skipping."
+        return None
 
 def main():
     avail_tests = [float_to_char,
@@ -170,11 +174,13 @@ def main():
     if not args.all:
         func = avail_tests[args.test]
         res = run_tests(func, N, iters)
-        replace_results(conn, label, N, iters, res)
+        if res is not None:
+            replace_results(conn, label, N, iters, res)
     else:
         for f in avail_tests:
             res = run_tests(f, N, iters)
-            replace_results(conn, label, N, iters, res)
+            if res is not None:
+                replace_results(conn, label, N, iters, res)
             
 if __name__ == "__main__":
     try:
-- 
cgit 


From 84cb8f63d0d96ede1a6a10940112ae5a087029fc Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 9 Feb 2012 23:23:36 -0500
Subject: volk: better args for benchmarking volk tests; can specify a list of
 test numbers.

---
 .../python/volk_benchmark/volk_math.py             |  9 ++++----
 .../python/volk_benchmark/volk_types.py            | 24 +++++++++++-----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py
index 42f3ffa4b..8b0081387 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_math.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_math.py
@@ -100,8 +100,8 @@ def main():
           implementation. The results are stored to an SQLite database \
           that can then be read by volk_plot.py to plot the differences.'
     parser = argparse.ArgumentParser(description=desc)
-    parser.add_argument('label', type=str,
-                        default=None,
+    parser.add_argument('-L', '--label', type=str,
+                        required=True, default=None,
                         help='Label of database table [default: %(default)s]')
     parser.add_argument('-D', '--database', type=str,
                         default="volk_results.db",
@@ -112,9 +112,10 @@ def main():
     parser.add_argument('-I', '--iterations', type=int,
                         default=20,
                         help='Number of iterations [default: %(default)s]')
-    parser.add_argument('--test', type=int,
+    parser.add_argument('--tests', type=int, nargs='*',
                         choices=xrange(len(avail_tests)),
-                        help='Test to run')
+                        help='A list of tests to run; can be a single test or a \
+                              space-separated list.')
     parser.add_argument('--list', action='store_true',
                         help='List the available tests')
     parser.add_argument('--all', action='store_true',
diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py
index 8041ccac1..893318ddd 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_types.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_types.py
@@ -138,8 +138,8 @@ def main():
           implementation. The results are stored to an SQLite database \
           that can then be read by volk_plot.py to plot the differences.'
     parser = argparse.ArgumentParser(description=desc)
-    parser.add_argument('label', type=str,
-                        default=None,
+    parser.add_argument('-L', '--label', type=str,
+                        required=True, default=None,
                         help='Label of database table [default: %(default)s]')
     parser.add_argument('-D', '--database', type=str,
                         default="volk_results.db",
@@ -150,9 +150,10 @@ def main():
     parser.add_argument('-I', '--iterations', type=int,
                         default=20,
                         help='Number of iterations [default: %(default)s]')
-    parser.add_argument('--test', type=int,
+    parser.add_argument('--tests', type=int, nargs='*',
                         choices=xrange(len(avail_tests)),
-                        help='Test to run')
+                        help='A list of tests to run; can be a single test or a \
+                              space-separated list.')
     parser.add_argument('--list', action='store_true',
                         help='List the available tests')
     parser.add_argument('--all', action='store_true',
@@ -171,16 +172,15 @@ def main():
     conn = create_connection(args.database)
     new_table(conn, label)
 
-    if not args.all:
-        func = avail_tests[args.test]
-        res = run_tests(func, N, iters)
+    if args.all:
+        tests = xrange(len(avail_tests))
+    else:
+        tests = args.tests
+
+    for test in tests:
+        res = run_tests(avail_tests[test], N, iters)
         if res is not None:
             replace_results(conn, label, N, iters, res)
-    else:
-        for f in avail_tests:
-            res = run_tests(f, N, iters)
-            if res is not None:
-                replace_results(conn, label, N, iters, res)
             
 if __name__ == "__main__":
     try:
-- 
cgit 


From f671319ca9ccef8fb1590e676ff6bcb85d7ca5a1 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 11 Feb 2012 09:06:45 -0500
Subject: core: reverting float_to_int to non-Volk due to precision/wrapping
 issues.

Using the Volk function causes too much of a change in the output values right now. Will have to relook at it for the right thing to do. Keeping the use of vlen and scale, though.
---
 gnuradio-core/src/lib/general/gr_float_to_int.cc       | 18 ++++++++++++++----
 gnuradio-core/src/lib/general/gri_float_to_int.cc      |  4 ++--
 gnuradio-core/src/lib/general/gri_float_to_int.h       |  2 +-
 .../src/python/gnuradio/gr/qa_float_to_int.py          | 14 ++++++--------
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc
index 28214538f..b69591043 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc
@@ -26,6 +26,7 @@
 
 #include <gr_float_to_int.h>
 #include <gr_io_signature.h>
+#include <gri_float_to_int.h>
 #include <volk/volk.h>
 
 gr_float_to_int_sptr
@@ -56,12 +57,17 @@ gr_float_to_int::set_scale(float scale)
 {
   d_scale = scale;
 }
-
 int
 gr_float_to_int::work (int noutput_items,
 		       gr_vector_const_void_star &input_items,
 		       gr_vector_void_star &output_items)
 {
+  // Disable the Volk for now. There is a problem for large 32-bit ints that
+  // are not properly represented by the precisions of a single float, which
+  // can cause wrapping from large, positive numbers to negative.
+  // In gri_float_to_int, the value is first promoted to a 64-bit
+  // value, clipped, then converted to a float.
+#if 0
   const float *in = (const float *) input_items[0];
   int32_t *out = (int32_t *) output_items[0];
 
@@ -71,9 +77,13 @@ gr_float_to_int::work (int noutput_items,
   else {
     volk_32f_s32f_convert_32i_a(out, in, d_scale, d_vlen*noutput_items);
   }
+#else
+  const float *in = (const float *) input_items[0];
+  int *out = (int *) output_items[0];
+
+  gri_float_to_int (in, out, d_scale, d_vlen*noutput_items);
+
+#endif
   
   return noutput_items;
 }
-
-
-
diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.cc b/gnuradio-core/src/lib/general/gri_float_to_int.cc
index 5271e60e2..0b0b10dfe 100644
--- a/gnuradio-core/src/lib/general/gri_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gri_float_to_int.cc
@@ -34,10 +34,10 @@ static const int64_t MIN_INT = -2147483647; // -(2^31)-1
 
 
 void 
-gri_float_to_int (const float *in, int *out, int nsamples)
+gri_float_to_int (const float *in, int *out, float scale, int nsamples)
 {
   for (int i = 0; i < nsamples; i++){
-    int64_t r = llrintf(in[i]);
+    int64_t r = llrintf(scale * in[i]);
     if (r < MIN_INT)
       r = MIN_INT;
     else if (r > MAX_INT)
diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.h b/gnuradio-core/src/lib/general/gri_float_to_int.h
index a2f6ea877..d8b98efc1 100644
--- a/gnuradio-core/src/lib/general/gri_float_to_int.h
+++ b/gnuradio-core/src/lib/general/gri_float_to_int.h
@@ -28,6 +28,6 @@
 /*!
  * convert array of floats to int with rounding and saturation.
  */
-GR_CORE_API void gri_float_to_int (const float *in, int *out, int nsamples);
+GR_CORE_API void gri_float_to_int (const float *in, int *out, float scale, int nsamples);
 
 #endif /* INCLUDED_GRI_FLOAT_TO_INT_H */
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
index 559f90f05..977a8518d 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
@@ -33,9 +33,7 @@ class test_float_to_int (gr_unittest.TestCase):
     def test_001(self):
 
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
-        
-        ### Volk results
-        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5]
+        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6]
 
         src = gr.vector_source_f(src_data)
         op = gr.float_to_int()
@@ -49,10 +47,10 @@ class test_float_to_int (gr_unittest.TestCase):
 
     def test_002(self):
 
-        src_data = (  2146400000,   2147483647,
-                     -2146400000,  -2147483648 )
-        expected_result = [  2146400000,   2146400000,
-                            -2146400000,  -2147483648 ]
+        src_data = ( 2147483647,  2147483648,  2200000000,
+                    -2147483648, -2147483649, -2200000000)
+        expected_result = [ 2147483647,  2147483647,  2147483647,
+                           -2147483647, -2147483647, -2147483647]
         src = gr.vector_source_f(src_data)
         op = gr.float_to_int()
         dst = gr.vector_sink_i()
@@ -69,7 +67,7 @@ class test_float_to_int (gr_unittest.TestCase):
         scale = 2
         vlen = 3
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
-        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6,]
+        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7,]
         src = gr.vector_source_f(src_data)
         s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
         op = gr.float_to_int(vlen, scale)
-- 
cgit 


From 4589b6d6f062e92fd84965eaf47d3fc30bdf516e Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sat, 11 Feb 2012 12:27:45 -0500
Subject: volk: added some documentation to the Doxygen manual explaining Volk
 and how to use it.

---
 docs/doxygen/other/main_page.dox  |  10 +++
 docs/doxygen/other/volk_guide.dox | 161 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 docs/doxygen/other/volk_guide.dox

diff --git a/docs/doxygen/other/main_page.dox b/docs/doxygen/other/main_page.dox
index 0caa0b20f..68b098943 100644
--- a/docs/doxygen/other/main_page.dox
+++ b/docs/doxygen/other/main_page.dox
@@ -38,4 +38,14 @@ More details on packages in GNU Radio:
 \li \ref page_uhd
 \li \ref page_vocoder
 \li \ref page_pfb
+
+\section volk_main Using Volk in GNU Radio
+
+The \ref volk_guide page provides an overview of how to incorporate
+and use Volk in GNU Radio blocks.
+
+Many blocks have already been converted to use Volk in their calls, so
+they can also serve as examples. See the gr_complex_to_xxx.h file for
+examples of various blocks that make use of Volk.
+
 */
diff --git a/docs/doxygen/other/volk_guide.dox b/docs/doxygen/other/volk_guide.dox
new file mode 100644
index 000000000..d898f3864
--- /dev/null
+++ b/docs/doxygen/other/volk_guide.dox
@@ -0,0 +1,161 @@
+/*! \page volk_guide Instructions for using Volk in GNU Radio
+
+\section volk_intro Introduction
+
+Volk is the Vector-Optimized Library of Kernels. It is a library that
+contains kernels of hand-written SIMD code for different mathematical
+operations. Since each SIMD architecture can be greatly different and
+no compiler has yet come along to handle vectorization properly or
+highly efficiently, Volk approaches the problem differently. For each
+architecture or platform that a developer wishes to vectorize for, a
+new proto-kernel is added to Volk. At runtime, Volk will select the
+correct proto-kernel. In this way, the users of Volk call a kernel for
+performing the operation that is platform/architecture agnostic. This
+allows us to write portable SIMD code.
+
+Volk kernels are always defined with a 'generic' proto-kernel, which
+is written in plain C. With the generic kernel, the kernel becomes
+portable to any platform. Kernels are then extended by adding
+proto-kernels for new platforms in which they are desired.
+
+A good example of a Volk kernel with multiple proto-kernels defined is
+the volk_32f_s32f_multiply_32f_a. This kernel implements a scalar
+multiplication of a vector of floating point numbers (each item in the
+vector is multiplied by the same value). This kernel has the following
+proto-kernels that are defined for 'generic,' 'avx,' 'sse,' and 'orc.'
+
+\code 
+    void volk_32f_s32f_multiply_32f_a_generic
+    void volk_32f_s32f_multiply_32f_a_sse
+    void volk_32f_s32f_multiply_32f_a_avx
+    void volk_32f_s32f_multiply_32f_a_orc
+\endcode
+
+These proto-kernels means that on platforms with AVX support, Volk can
+select this option or the SSE option, depending on which is faster. On
+other platforms, the ORC SIMD compiler might provide a solution. If
+all else fails, Volk can fall back on the generic proto-kernel, which
+will always work.
+
+Just a note on ORC. ORC is a SIMD compiler library that uses a generic
+assembly-like language for SIMD commands. Based on the available SIMD
+architecture of a system, it will try and compile a good
+solution. Tests show that the results of ORC proto-kernels are
+generally better than the generic versions but often not as good as
+the hand-tuned proto-kernels for a specific SIMD architecture. This
+is, of course, to be expected, and ORC provides a nice intermediary
+step to performance improvements until a specific hand-tuned
+proto-kernel can be made for a given platform.
+
+See <a
+href="http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk">Volk on
+gnuradio.org</a> for details on the Volk naming scheme.
+
+
+\section volk_alignment Setting and Using Memory Alignment Information
+
+For Volk to work as best as possible, we want to use memory-aligned
+SIMD calls, which means we have to have some way of knowing and
+controlling the alignment of the buffers passed to gr_block's work
+function. We set the alignment requirement for SIMD aligned memory
+calls with:
+
+\code
+  const int alignment_multiple =
+    volk_get_alignment() / output_item_size;
+  set_alignment(alignment_multiple);
+\endcode
+
+The Volk function 'volk_get_alignment' provides the alignment of the
+the machine architecture. We then base the alignment on the number of
+output items required to maintain the alignment, so we divide the
+number of alignment bytes by the number of bytes in an output items
+(sizeof(float), sizeof(gr_complex), etc.). This value is then set per
+block with the 'set_alignment' function.
+
+Because the scheduler tries to optimize throughput, the number of
+items available per call to work will change and depends on the
+availability of the read and write buffers. This means that it
+sometimes cannot produce a buffer that is properly memory
+aligned. This is an inevitable consequence of the scheduler
+system. Instead of requiring alignment, the scheduler enforces the
+alignment as much as possible, and when a buffer becomes unaligned,
+the scheduler will work to correct it as much as possible. If a
+block's buffers are unaligned, then, the scheduler sets a flag to
+indicate as much so that the block can then decide what best to
+do. The next section discusses the use of the aligned/unaligned
+information in a gr_block's work function.
+
+
+\section volk_work Using Alignment Properties in Work()
+
+The buffers passed to work/general_work in a gr_block are not
+guaranteed to be aligned, but they will mostly be aligned whenever
+possible. When not aligned, the 'is_unaligned()' flag will be set. So
+a block can know if its buffers are aligned and make the right
+decisions. This looks like:
+
+\code
+int
+gr_some_block::work (int noutput_items,
+		     gr_vector_const_void_star &input_items,
+		     gr_vector_void_star &output_items)
+{
+  const float *in = (const float *) input_items[0];
+  float *out = (float *) output_items[0];
+
+  if(is_unaligned()) {
+    // do something with unaligned data. This can either be a manual
+    // handling of the items or a call to an unaligned Volk function.
+    volk_32f_something_32f_u(out, in, noutput_items);
+  }
+  else {
+    // Buffers are aligned; can call the aligned Volk function.
+    volk_32f_something_32f_a(out, in, noutput_items);
+  }
+
+  return noutput_items;
+}
+\endcode
+
+
+
+\section volk_tuning Tuning Volk Performance
+
+VOLK comes with a profiler that will build a config file for the best
+SIMD architecture for your processor. Run volk_profile that is
+installed into $PREFIX/bin. This program tests all known VOLK kernels
+for each architecture supported by the processor. When finished, it
+will write to $HOME/.volk/volk_config the best architecture for the
+VOLK function. This file is read when using a function to know the
+best version of the function to execute.
+
+\subsection volk_hand_tuning Hand-Tuning Performance
+
+If you know a particular architecture works best for your processor,
+you can specify the particular architecture to use in the VOLK
+preferences file: $HOME/.volk/volk_config
+
+The file looks like:
+
+\code
+    volk_<FUNCTION_NAME> <ARCHITECTURE>
+\endcode
+
+Where the "FUNCTION_NAME" is the particular function that you want to
+over-ride the default value and "ARCHITECTURE" is the VOLK SIMD
+architecture to use (generic, sse, sse2, sse3, avx, etc.). For
+example, the following config file tells VOLK to use SSE3 for the
+aligned and unaligned versions of a function that multiplies two
+complex streams together.
+
+\code
+    volk_32fc_x2_multiply_32fc_a sse3
+    volk_32fc_x2_multiply_32fc_u sse3
+\endcode
+
+\b Tip: if benchmarking GNU Radio blocks, it can be useful to have a
+volk_config file that sets all architectures to 'generic' as a way to
+test the vectorized versus non-vectorized implementations.
+
+*/
-- 
cgit 


From ca8889bc5d83bf380832431ebb30c88ddef5a924 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 13 Feb 2012 14:47:42 -0500
Subject: volk: better handling of plot for error bars. Older versions of pylab
 don't like the kwargs.

---
 gnuradio-examples/python/volk_benchmark/volk_plot.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py
index d7578c5a7..562d4f2f7 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_plot.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py
@@ -103,14 +103,16 @@ def main():
                 ydata.append(table_data[t][name]['avg'])
 
         if(args.errorbars is False):
-            stds = None
-
-        s0.bar(x1, ydata, width=wdth,
-               yerr=stds,
-               color=colors[i%M], label=t,
-               edgecolor='k', linewidth=2,
-               error_kw={"ecolor": 'k', "capsize":5,
-                         "linewidth":2})
+            s0.bar(x1, ydata, width=wdth,
+                   color=colors[i%M], label=t,
+                   edgecolor='k', linewidth=2)
+        else:
+            s0.bar(x1, ydata, width=wdth,
+                   yerr=stds,
+                   color=colors[i%M], label=t,
+                   edgecolor='k', linewidth=2,
+                   error_kw={"ecolor": 'k', "capsize":5,
+                             "linewidth":2})
 
     s0.legend()
     s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']),
-- 
cgit 


From 2597fe8ed82ee04c161c9f08534ae1b90d2b7d88 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 13 Feb 2012 14:50:14 -0500
Subject: core: change alignment requirement.

---
 gnuradio-core/src/lib/general/gr_char_to_short.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc
index 13375412c..8b6cd0be1 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc
@@ -41,7 +41,7 @@ gr_char_to_short::gr_char_to_short (size_t vlen)
     d_vlen(vlen)
 {
   const int alignment_multiple =
-    volk_get_alignment() / sizeof(short);
+    volk_get_alignment() / sizeof(char);
   set_alignment(alignment_multiple);
 }
 
-- 
cgit 


From e36d6f1f766e702d147ca494e21131cc66f157dd Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 13 Feb 2012 16:10:14 -0500
Subject: volk: can specify a table to calculate the percent improvement
 against instead of just the raw numbers.

---
 .../python/volk_benchmark/volk_plot.py             | 80 ++++++++++++++++------
 1 file changed, 60 insertions(+), 20 deletions(-)

diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py
index 562d4f2f7..823dfbf64 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_plot.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py
@@ -25,11 +25,15 @@ def main():
                         choices=['mean', 'min', 'max'],
                         default='mean',
                         help='Set the type of plot to produce [default: %(default)s]')
+    parser.add_argument('-%', '--percent', type=str,
+                        default=None, metavar="table",
+                        help='Show percent difference to the given type [default: %(default)s]')
     args = parser.parse_args()
     
     # Set up global plotting properties
     matplotlib.rcParams['figure.subplot.bottom'] = 0.2
     matplotlib.rcParams['figure.subplot.top'] = 0.95
+    matplotlib.rcParams['figure.subplot.right'] = 0.98
     matplotlib.rcParams['ytick.labelsize'] = 16
     matplotlib.rcParams['xtick.labelsize'] = 16
     matplotlib.rcParams['legend.fontsize'] = 18
@@ -39,9 +43,6 @@ def main():
     tables = list_tables(conn)
     M = len(tables)
 
-    # width of bars depends on number of comparisons
-    wdth = 0.80/M
-
     # Colors to distinguish each table in the bar graph
     # More than 5 tables will wrap around to the start.
     colors = ['b', 'r', 'g', 'm', 'k']
@@ -85,12 +86,23 @@ def main():
 
         table_data[table[0]] = data
 
+    if args.percent is not None:
+        for i,t in enumerate(table_data):
+            if args.percent == t:
+                norm_data = []
+                for name in name_reg:
+                    if(args.plot == 'max'):
+                        norm_data.append(table_data[t][name]['max'])
+                    elif(args.plot == 'min'):
+                        norm_data.append(table_data[t][name]['min'])
+                    elif(args.plot == 'mean'):
+                        norm_data.append(table_data[t][name]['avg'])
+        
+
     # Plot the results
     x0 = xrange(len(name_reg))
-    for i,t in enumerate(table_data):
-        # makes x values for this data set placement
-        x1 = [x + i*wdth for x in x0]
-
+    i = 0
+    for t in (table_data):
         ydata = []
         stds = []
         for name in name_reg:
@@ -99,24 +111,52 @@ def main():
                 ydata.append(table_data[t][name]['max'])
             elif(args.plot == 'min'):
                 ydata.append(table_data[t][name]['min'])
-            if(args.plot == 'mean'):
+            elif(args.plot == 'mean'):
                 ydata.append(table_data[t][name]['avg'])
 
-        if(args.errorbars is False):
-            s0.bar(x1, ydata, width=wdth,
-                   color=colors[i%M], label=t,
-                   edgecolor='k', linewidth=2)
+        if args.percent is not None:
+            ydata = [-100*(y-n)/y for y,n in zip(ydata,norm_data)]
+            if(args.percent != t):
+                # makes x values for this data set placement
+                # width of bars depends on number of comparisons
+                wdth = 0.80/(M-1)
+                x1 = [x + i*wdth for x in x0]
+                i += 1
+
+                s0.bar(x1, ydata, width=wdth,
+                       color=colors[(i-1)%M], label=t,
+                       edgecolor='k', linewidth=2)
+
         else:
-            s0.bar(x1, ydata, width=wdth,
-                   yerr=stds,
-                   color=colors[i%M], label=t,
-                   edgecolor='k', linewidth=2,
-                   error_kw={"ecolor": 'k', "capsize":5,
-                             "linewidth":2})
+            # makes x values for this data set placement
+            # width of bars depends on number of comparisons
+            wdth = 0.80/M
+            x1 = [x + i*wdth for x in x0]
+            i += 1
+
+            if(args.errorbars is False):
+                s0.bar(x1, ydata, width=wdth,
+                       color=colors[(i-1)%M], label=t,
+                       edgecolor='k', linewidth=2)
+            else:
+                s0.bar(x1, ydata, width=wdth,
+                       yerr=stds,
+                       color=colors[i%M], label=t,
+                       edgecolor='k', linewidth=2,
+                       error_kw={"ecolor": 'k', "capsize":5,
+                                 "linewidth":2})
+
+    nitems = res[0]['nitems']
+    if args.percent is None:
+        s0.set_ylabel("Processing time (sec) [{0:G} items]".format(nitems),
+                      fontsize=22, fontweight='bold',
+                      horizontalalignment='center')
+    else:
+        s0.set_ylabel("% Improvement over {0} [{1:G} items]".format(
+                args.percent, nitems),
+                      fontsize=22, fontweight='bold')
 
     s0.legend()
-    s0.set_ylabel("Processing time (sec) [{0:G} items]".format(res[0]['nitems']),
-                  fontsize=22, fontweight='bold')
     s0.set_xticks(x0)
     s0.set_xticklabels(name_reg)
     for label in s0.xaxis.get_ticklabels():
-- 
cgit 


From ef1748e4efc40cc065fb5f1b40d710256dd37efa Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Mon, 13 Feb 2012 20:53:51 -0500
Subject: volk: complex_to_arg doesn't actually use Volk. No need to benchmark
 it.

---
 gnuradio-examples/python/volk_benchmark/volk_types.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py
index 893318ddd..3bc5a22ae 100755
--- a/gnuradio-examples/python/volk_benchmark/volk_types.py
+++ b/gnuradio-examples/python/volk_benchmark/volk_types.py
@@ -97,12 +97,6 @@ def complex_to_mag_squared(N):
 
 ######################################################################
 
-def complex_to_arg(N):
-    op = gr.complex_to_arg()
-    tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
-    return tb
-
-######################################################################
 
 def run_tests(func, N, iters):
     print("Running Test: {0}".format(func.__name__))
@@ -128,8 +122,7 @@ def main():
                    complex_to_real,
                    complex_to_imag,
                    complex_to_mag,
-                   complex_to_mag_squared,
-                   complex_to_arg]
+                   complex_to_mag_squared]
 
     desc='Time an operation to compare with other implementations. \
           This program runs a simple GNU Radio flowgraph to test a \
-- 
cgit 


From ba3f1a4e8d5879c91eb5c47cc7e7c3ac73b1989d Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 14 Feb 2012 17:20:11 -0500
Subject: volk: added README file to explain how to run the benchmark tests and
 plotting tool.

---
 gnuradio-examples/python/volk_benchmark/README | 252 +++++++++++++++++++++++++
 1 file changed, 252 insertions(+)
 create mode 100644 gnuradio-examples/python/volk_benchmark/README

diff --git a/gnuradio-examples/python/volk_benchmark/README b/gnuradio-examples/python/volk_benchmark/README
new file mode 100644
index 000000000..516fc15bd
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/README
@@ -0,0 +1,252 @@
+VOLK Benchmarking Scripts
+
+The Python programs in this directory are designed to help benchmark
+and compare Volk enhancements to GNU Radio. There are two kinds of
+scripts here: collecting data and displaying the data.
+
+Data collection is done by running a Volk testing script that will
+populate a SQLite database file (volk_results.db by default). The
+plotting utility provided here reads from the database files and plots
+bar graphs to compare the different installations.
+
+These benchmarks can be used to compare previous versions of GNU
+Radio to using Volk; they can be used to compare different Volk
+proto-kernels, as well, by editing the volk_config file; or they could
+be used to compare performance between different machines and/or
+processors.
+
+
+======================================================================
+Volk Profiling
+
+Before doing any kind of Volk benchmarking, it is important to run the
+volk_profile program. The profiler will build a config file for the
+best SIMD architecture for your processor. Run volk_profile that is
+installed into $PREFIX/bin. This program tests all known Volk kernels
+for each proto-kernel supported by the processor. When finished, it
+will write to $HOME/.volk/volk_config the best architecture for the
+VOLK function. This file is read when using a function to know the
+best version of the function to execute.
+
+The volk_config file contains a line for each kernel, where each line
+looks like:
+
+    volk_<KERNEL_NAME> <ARCHITECTURE>
+
+The architecture will be something like (sse, sse2, sse3, avx, neon,
+etc.), depending on your processor.
+
+
+======================================================================
+Benchmark Tests
+
+There are currently two benchmark scripts defined for collecting
+data. There is one that runs through the type conversions that have
+been converted to Volk (volk_types.py) and the other runs through the
+math operators converted to using Volk (volk_math.py).
+
+Script prototypes
+Both have the same structure for use:
+
+----------------------------------------------------------------------
+./volk_<test>.py [-h] -L LABEL [-D DATABASE] [-N NITEMS] [-I ITERATIONS]
+                    [--tests [{0,1,2,3} [{0,1,2,3} ...]]] [--list]
+                    [--all]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -L LABEL, --label LABEL
+                        Label of database table [default: None]
+  -D DATABASE, --database DATABASE
+                        Database file to store data in [default:
+                        volk_results.db]
+  -N NITEMS, --nitems NITEMS
+                        Number of items per iterations [default: 1000000000.0]
+  -I ITERATIONS, --iterations ITERATIONS
+                        Number of iterations [default: 20]
+  --tests [{0,1,2,3} [{0,1,2,3} ...]]
+                        A list of tests to run; can be a single test or a
+                        space-separated list.
+  --list                List the available tests
+  --all                 Run all tests
+----------------------------------------------------------------------
+
+To run, you specify the tests to run and a label to store along with
+the results. To find out what the available tests are, use the
+'--list' option.
+
+To specify a subset of tests, use the '--tests' with space-separated
+list of tests numbers (e.g., --tests 0 2 4 9).
+
+Use the '--all' to run all tests.
+
+The label specified is used as an identifier for the benchmarking
+currently being done. This is required as it is important in
+organizing the data in the database (each label is its own
+table). Usually, the label will specify the type of run being done,
+such as "volk_aligned" or "v3_5_1". In these cases, the "volk_aligned"
+label says that this is for a benchmarking using the GNU Radio version
+that uses the aligned scheduler and Volk calls in the work
+functions. The "v3_5_1" label is if you were benchmarking an installed
+version 3.5.1 of GNU Radio, which is pre-Volk. These will then be
+plotted against each other to see the timing differences.
+
+The 'database' option will output the results to a new database
+file. This can be useful for separating the output of different runs
+or of different benchmarks, such as the types versus the math scripts,
+say, or to distinguish results from different computers.
+
+If rerun using the same database and label, the entries in the table
+will simply be replaced by the new results.
+
+It is often useful to use the 'sqlitebrowser' program to interrogate
+the database file farther, if you are interested in the structure or
+the raw data.
+
+Other parameters of this script set the number of items to process and
+number of iterations to use when computing the benchmarking
+data. These default to 1 billion samples per iteration over 20
+iterations. Expect a default run to take a long time. Using the '-N'
+and '-I' options can be used to change the runtime of the benchmarks
+but are set high to remove problems of variance between iterations.
+
+======================================================================
+Plotting Results
+
+The volk_plot.py script reads a given database file and plots the
+results. The default behavior is to read all of the labels stored in
+the database and plot them as data sets on a bar graph. This shows the
+average time taken to process the number of items given.
+
+The options for the plotting script are:
+
+usage: volk_plot.py [-h] [-D DATABASE] [-E] [-P {mean,min,max}] [-% table]
+
+Plot Volk performance results from a SQLite database. Run one of the volk
+tests first (e.g, volk_math.py)
+
+----------------------------------------------------------------------
+optional arguments:
+  -h, --help            show this help message and exit
+  -D DATABASE, --database DATABASE
+                        Database file to read data from [default:
+                        volk_results.db]
+  -E, --errorbars       Show error bars (1 standard dev.)
+  -P {mean,min,max}, --plot {mean,min,max}
+                        Set the type of plot to produce [default: mean]
+  -% table, --percent table
+                        Show percent difference to the given type [default:
+                        None]
+----------------------------------------------------------------------
+
+This script allows you to specify the database used (-D), but will
+always read all rows from all tables from it and display them. You can
+also turn on plotting error bars (1 standard deviation the mean). Be
+careful, though, as some older versions of Matplotlib might have an
+issue with this option.
+
+The mean time is only one possible statistic that we might be
+interested in when looking at the data. It represents the average user
+experience when running a given block. On the other hand, the minimum
+runtime best represents the actual performance of a block given
+minimal OS interruptions while running. Right now, the data collected
+includes the mean, variance, min, and max over the number of
+iterations given. Using the '-P' option, you can specify the type of
+data to plot (mean, min, or max).
+
+Another useful way of looking at the data is to compare the percent
+improvement of a benchmark compared to another. This is done using the
+'-%' option with the provided table (or label) as the baseline. So if
+we were interested in comparing how much the 'volk_aligned' was over
+'v3_5_1', we would specify '-% v3_5_1' to see this. The plot would
+then only show the percent speedup observed using Volk for each of the
+blocks.
+
+
+======================================================================
+Benchmarking Walkthrough
+
+This will walk through an example of benchmarking the new Volk
+implementation versus the pre-Volk GNU Radio. It also shows how to
+look at the SIMD optimized versions versus the generic
+implementations.
+
+Since we introduced Volk in GNU Radio 3.5.2, we will use the following
+labels for our data:
+
+   1.) volk_aligned: v3.5.2 with volk_profile results in .volk/volk_config
+   2.) v3_5_2: v3.5.2 with the generic (non-SIMD) calls to Volk
+   3.) v3_5_1: an installation of GNU Radio from version v3.5.1
+
+We assume that we have installed two versions of GNU Radio.
+
+   v3.5.2 installed into /opt/gr-3_5_2
+   v3.5.1 installed into /opt/gr-3_5_1
+
+To test cases 1 and 2 above, we have to run GNU Radio from the v3.5.2
+installation, so we set the following environmental variables. Note
+that this is written for Ubuntu 11.10. These commands and directories
+may have to be changed depending on your OS and versions.
+
+    export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib
+    export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib/python2.7/dist-packages
+
+Now we can run the benchmark tests, so we will focus on the math
+operators:
+
+    ./volk_math.py -D volk_results_math.db --all -L volk_aligned
+
+When this finishes, the 'volk_results_math.db' will contain our
+results for this run.
+
+We next want to run the generic, non-SIMD, calls. This can be done by
+changing the Volk kernel settings in $HOME/.volk/volk_config. First,
+make a backup of this file. Then edit it and change all architecture
+calls (sse, sse2, etc.) to 'generic.' Now, Volk will only call the
+generic versions of these functions. So we rerun the benchmark with:
+
+    ./volk_math.py -D volk_results_math.db --all -L v3_5_2
+
+Notice that the only thing changed here was the label to 'v3_5_2'.
+
+Next, we want to collect data for the non-Volk version of GNU
+Radio. This is important because some internals to GNU Radio were made
+when adding support for Volk, so it is nice to know what the
+differences do to our performance. First, we set the environmental
+variables to point to the v3.5.1 installation:
+
+    export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib
+    export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib/python2.7/dist-packages
+
+And when we run the test, we use the same command line, but the GNU
+Radio libraries and Python files used come from v3.5.1. We also change
+the label to indicate the different version to store.
+    
+    ./volk_math.py -D volk_results_math.db --all -L v3_5_1
+
+We now have a database populated with three tables for the three
+different labels. We can plot them all together by simply running:
+
+    ./volk_plot.py -D volk_results_math.db
+
+This will show the average run times for each of the three
+configurations for all math functions tested. We might also be
+interested to see the difference in performance from the v3.5.1
+version, so we can run:
+
+    ./volk_plot.py -D volk_results_math.db -% v3_5_1
+
+That will plot both the 'volk_aligned' and 'v3_5_2' as a percentage
+improvement over v3_5_1. A positive value indicates that this version
+runs faster than the v3.5.1 version.
+
+
+----------------------------------------------------------------------
+
+Another interesting test case could be to compare results on different
+processors. So if you have different generation Intels, AMD, or
+whatever, you can simply pass the .db file around and run the Volk
+benchmark script to populate the database with different results. For
+this, you would specify a label like '-L i7_2620M' that indicates the
+processor type to uniquely ID the data.
+
-- 
cgit 


From 2eaa0a6e1e57cfc374c258c317ecb469fc49bf53 Mon Sep 17 00:00:00 2001
From: Johnathan Corgan
Date: Tue, 14 Feb 2012 15:37:57 -0800
Subject: build: fix autotools for gnuradio-core volkification

---
 config/grc_volk.m4                              | 7 +++----
 gnuradio-core/src/lib/Makefile.am               | 1 +
 gnuradio-core/src/lib/general/Makefile.am       | 8 ++++----
 gnuradio-core/src/lib/gengen/Makefile.gen       | 9 ---------
 gnuradio-core/src/lib/gengen/generate_common.py | 2 +-
 grc/blocks/Makefile.am                          | 2 --
 volk/include/volk/Makefile.am                   | 4 ++--
 7 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/config/grc_volk.m4 b/config/grc_volk.m4
index f349d5e99..cc6d75649 100644
--- a/config/grc_volk.m4
+++ b/config/grc_volk.m4
@@ -26,11 +26,10 @@ AC_DEFUN([GRC_VOLK],[
     dnl Test if $enable_volk is:
     dnl   yes  : if the --enable code passed muster and all dependencies are met
     dnl   no   : otherwise, then do not set variables
-    if test $passed != with && test x$enable_volk == xyes; then
+    if test $passed != with && test x$enable_volk != xno; then
     	dnl how and where to find INCLUDES and LA
-	volk_INCLUDES="-I\${abs_top_srcdir}/volk/include"
-	volk_LA="\${abs_top_builddir}/volk/lib/libvolk.la \
-	         \${abs_top_builddir}/volk/lib/libvolk_runtime.la"
+	volk_INCLUDES="-I\${abs_top_srcdir}/volk/gen/include -I\${abs_top_srcdir}/volk/include"
+	volk_LA="\${abs_top_builddir}/volk/lib/libvolk.la"
     fi
 
     dnl volk uses a subsidiary configure.ac
diff --git a/gnuradio-core/src/lib/Makefile.am b/gnuradio-core/src/lib/Makefile.am
index fc1b7917b..21e721073 100644
--- a/gnuradio-core/src/lib/Makefile.am
+++ b/gnuradio-core/src/lib/Makefile.am
@@ -51,6 +51,7 @@ libgnuradio_core_la_LIBADD  = 		\
 	runtime/libruntime.la		\
 	hier/libhier.la			\
 	$(GRUEL_LA)			\
+	$(VOLK_LA)			\
 	$(FFTW3F_LIBS)			\
 	$(GSL_LIBS)			\
 	$(CBLAS_LIBS)			\
diff --git a/gnuradio-core/src/lib/general/Makefile.am b/gnuradio-core/src/lib/general/Makefile.am
index ea3b31fd3..5b4a702e1 100644
--- a/gnuradio-core/src/lib/general/Makefile.am
+++ b/gnuradio-core/src/lib/general/Makefile.am
@@ -129,8 +129,8 @@ libgeneral_la_SOURCES = 		\
 	gr_reverse.cc			\
 	gr_rms_cf.cc			\
 	gr_rms_ff.cc			\
+	gr_short_to_char.cc		\
 	gr_short_to_float.cc		\
-	gr_short_to_int.cc		\
 	gr_int_to_float.cc		\
 	gr_simple_correlator.cc		\
 	gr_simple_framer.cc		\
@@ -194,8 +194,8 @@ libgeneral_qa_la_SOURCES = 		\
 grinclude_HEADERS = 			\
 	gr_core_api.h			\
 	complex_vec_test.h		\
-	gr_add.h			\
 	gr_additive_scrambler_bb.h	\
+	gr_add_ff.h			\
 	gr_agc_cc.h                 	\
 	gr_agc_ff.h                 	\
 	gr_agc2_cc.h                	\
@@ -339,7 +339,7 @@ grinclude_HEADERS = 			\
 	gri_int_to_float.h		\
 	gri_lfsr_15_1_0.h		\
 	gri_lfsr_32k.h			\
-	gri_short_to_char.h		\
+	gri_short_to_float.h		\
 	gri_uchar_to_float.h		\
 	malloc16.h			\
 	random.h			\
@@ -368,8 +368,8 @@ noinst_HEADERS = 			\
 swiginclude_HEADERS =			\
 	complex_vec_test.i		\
 	general.i			\
-	gr_add.i			\
 	gr_additive_scrambler_bb.i	\
+	gr_add_ff.i			\
 	gr_agc_cc.i                 	\
 	gr_agc_ff.i                 	\
 	gr_agc2_cc.i                 	\
diff --git a/gnuradio-core/src/lib/gengen/Makefile.gen b/gnuradio-core/src/lib/gengen/Makefile.gen
index fb7b21e24..db260585f 100644
--- a/gnuradio-core/src/lib/gengen/Makefile.gen
+++ b/gnuradio-core/src/lib/gengen/Makefile.gen
@@ -12,7 +12,6 @@ GENERATED_H = \
 	gr_add_const_vff.h \
 	gr_add_const_vii.h \
 	gr_add_const_vss.h \
-	gr_add_ff.h \
 	gr_add_ii.h \
 	gr_add_ss.h \
 	gr_and_bb.h \
@@ -45,14 +44,12 @@ GENERATED_H = \
 	gr_moving_average_ff.h \
 	gr_moving_average_ii.h \
 	gr_moving_average_ss.h \
-	gr_multiply_const_ff.h \
 	gr_multiply_const_ii.h \
 	gr_multiply_const_ss.h \
 	gr_multiply_const_vcc.h \
 	gr_multiply_const_vff.h \
 	gr_multiply_const_vii.h \
 	gr_multiply_const_vss.h \
-	gr_multiply_ff.h \
 	gr_multiply_ii.h \
 	gr_multiply_ss.h \
 	gr_mute_cc.h \
@@ -115,7 +112,6 @@ GENERATED_I = \
 	gr_add_const_vff.i \
 	gr_add_const_vii.i \
 	gr_add_const_vss.i \
-	gr_add_ff.i \
 	gr_add_ii.i \
 	gr_add_ss.i \
 	gr_and_bb.i \
@@ -148,14 +144,12 @@ GENERATED_I = \
 	gr_moving_average_ff.i \
 	gr_moving_average_ii.i \
 	gr_moving_average_ss.i \
-	gr_multiply_const_ff.i \
 	gr_multiply_const_ii.i \
 	gr_multiply_const_ss.i \
 	gr_multiply_const_vcc.i \
 	gr_multiply_const_vff.i \
 	gr_multiply_const_vii.i \
 	gr_multiply_const_vss.i \
-	gr_multiply_ff.i \
 	gr_multiply_ii.i \
 	gr_multiply_ss.i \
 	gr_mute_cc.i \
@@ -218,7 +212,6 @@ GENERATED_CC = \
 	gr_add_const_vff.cc \
 	gr_add_const_vii.cc \
 	gr_add_const_vss.cc \
-	gr_add_ff.cc \
 	gr_add_ii.cc \
 	gr_add_ss.cc \
 	gr_and_bb.cc \
@@ -251,14 +244,12 @@ GENERATED_CC = \
 	gr_moving_average_ff.cc \
 	gr_moving_average_ii.cc \
 	gr_moving_average_ss.cc \
-	gr_multiply_const_ff.cc \
 	gr_multiply_const_ii.cc \
 	gr_multiply_const_ss.cc \
 	gr_multiply_const_vcc.cc \
 	gr_multiply_const_vff.cc \
 	gr_multiply_const_vii.cc \
 	gr_multiply_const_vss.cc \
-	gr_multiply_ff.cc \
 	gr_multiply_ii.cc \
 	gr_multiply_ss.cc \
 	gr_mute_cc.cc \
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 616cc4b06..6da2044e0 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -66,7 +66,7 @@ others = (
     ('gr_peak_detector_XX',         ('fb','ib','sb')),
     ('gr_multiply_XX',              ('ss','ii')),
     ('gr_multiply_const_XX',        ('ss','ii')),
-    ('gr_add_XX',                   ('ss','cc'))
+    ('gr_add_XX',                   ('ss','cc','ii'))
     )
 
 
diff --git a/grc/blocks/Makefile.am b/grc/blocks/Makefile.am
index 738e79f24..104ba8062 100644
--- a/grc/blocks/Makefile.am
+++ b/grc/blocks/Makefile.am
@@ -28,8 +28,6 @@ dist_ourdata_DATA = \
 	band_reject_filter.xml \
 	blks2_am_demod_cf.xml \
 	blks2_analysis_filterbank.xml \
-	blks2_cvsd_encode.xml \
-	blks2_cvsd_decode.xml \
 	blks2_error_rate.xml \
 	blks2_fm_deemph.xml \
 	blks2_fm_demod_cf.xml \
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index f6b5835b1..a01ddf193 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -59,8 +59,8 @@ volkinclude_HEADERS = \
 	volk_32fc_32f_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_a.h \
 	volk_32fc_s32fc_multiply_32fc_u.h \
-	volk_32fc_s32fc_multiply_conjugate_32fc_a.h \
-	volk_32fc_s32fc_multiply_conjugate_32fc_u.h \
+	volk_32fc_x2_multiply_conjugate_32fc_a.h \
+	volk_32fc_x2_multiply_conjugate_32fc_u.h \
 	volk_32fc_s32f_power_32fc_a.h \
 	volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \
 	volk_32fc_s32f_atan2_32f_a.h \
-- 
cgit 


From fa8ab7cb146287a9f0d8db67e3126ab4a867a201 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 21 Feb 2012 15:41:27 -0800
Subject: Volk: add scalar const support to the profiler/QA code. Disabled
 volk_32fc_s32fc_multiply_32fc_a's Orc impl due to it not working.

---
 volk/apps/volk_profile.cc                          |  4 +--
 .../include/volk/volk_32fc_s32fc_multiply_32fc_a.h |  6 ++--
 volk/lib/qa_utils.cc                               | 32 +++++++++++++++++++---
 volk/lib/qa_utils.h                                |  5 +++-
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 10a699872..5ad7727aa 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -102,8 +102,8 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results);
     VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results);
     VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
-    VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results);
+    //VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, lv_32fc_t(1.0, 0.5), 204600, 1000, &results);
+    VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 1.0, 204600, 10000, &results);
 
     char path[256];
     get_config_path(path);
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
index b27a7259f..75cf8c8b2 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -34,9 +34,9 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
 extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points);
-static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points);
-}
+//static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+//    volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points);
+//}
 #endif /* LV_HAVE_ORC */
 
 
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 9bb515e9f..bb37801c9 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -198,6 +198,18 @@ inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buf
     while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
 }
 
+inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
+}
+
 template <class t>
 bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
     bool fail = false;
@@ -246,7 +258,7 @@ bool run_volk_tests(struct volk_func_desc desc,
                     void (*manual_func)(),
                     std::string name,
                     float tol,
-                    float scalar,
+                    lv_32fc_t scalar,
                     int vlen,
                     int iter,
                     std::vector<std::string> *best_arch_vector = 0
@@ -316,21 +328,33 @@ bool run_volk_tests(struct volk_func_desc desc,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    if(inputsc[0].is_complex) {
+                        run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    } else {
+                        run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+                    }
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    if(inputsc[0].is_complex) {
+                        run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    } else {
+                        run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+                    }
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
                     run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    if(inputsc[0].is_complex) {
+                        run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                    } else {
+                        run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+                    }
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index a1bc1f20c..b998df852 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -21,7 +21,7 @@ volk_type_t volk_type_from_string(std::string);
 float uniform(void);
 void random_floats(float *buf, unsigned n);
 
-bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *);
+bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *);
 
 #define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); }
 #define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results)
@@ -32,5 +32,8 @@ typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const
 typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
 typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
 typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
+typedef void (*volk_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input
+typedef void (*volk_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
+typedef void (*volk_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
 
 #endif //VOLK_QA_UTILS_H
-- 
cgit 


From 330cddf31208b843c0997c6fb05cb3facf31f536 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 22 Feb 2012 08:46:55 -0800
Subject: Remove ORC invocation since // doesn't dissuade the generator.

---
 volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
index 75cf8c8b2..665fad47a 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -25,20 +25,6 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c
 }
 #endif /* LV_HAVE_GENERIC */
 
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-  */
-extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points);
-//static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-//    volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points);
-//}
-#endif /* LV_HAVE_ORC */
-
 
 
-- 
cgit 


From e8d644872837f4cbfc05851710531b2ac5259806 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 23 Feb 2012 14:28:21 -0500
Subject: volk: float to short conversion is consistent between archs and tail
 cases. Rounds to nearest number.

---
 gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py |  7 ++-----
 volk/include/volk/volk_32f_s32f_convert_16i_a.h           | 15 ++++++++-------
 volk/include/volk/volk_32f_s32f_convert_16i_u.h           | 15 ++++++++-------
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
index 926f1c08b..0d89a149c 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
@@ -34,10 +34,7 @@ class test_float_to_short (gr_unittest.TestCase):
     def test_001(self):
 
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
-        expected_result = [int(round(s)) for s in src_data]
-
-        ### Volk results
-        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -5]
+        expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6]
 
         src = gr.vector_source_f(src_data)
         op = gr.float_to_short()
@@ -71,7 +68,7 @@ class test_float_to_short (gr_unittest.TestCase):
         scale = 2
         vlen = 3
         src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
-        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -6]
+        expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7]
         src = gr.vector_source_f(src_data)
         s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
         op = gr.float_to_short(vlen, scale)
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
index 10c921b08..a24959678 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
@@ -4,6 +4,7 @@
 #include <volk/volk_common.h>
 #include <inttypes.h>
 #include <stdio.h>
+#include <math.h>
 
 #ifdef LV_HAVE_SSE2
 #include <emmintrin.h>
@@ -57,7 +58,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
       r = max_val;
     else if(r < min_val)
       r = min_val;
-    outputVector[number] = (int16_t)(r);
+    outputVector[number] = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -98,10 +99,10 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
   }
 
   number = quarterPoints * 4;    
@@ -111,7 +112,7 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
       r = max_val;
     else if(r < min_val)
       r = min_val;
-    outputVector[number] = (int16_t)(r);
+    outputVector[number] = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -138,7 +139,7 @@ static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, co
       r = min_val;
     else if(r > max_val)
       r = max_val;
-    *outputVectorPtr++ = (int16_t)(r);
+    *outputVectorPtr++ = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index f339a7d10..f58158041 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -3,6 +3,7 @@
 
 #include <inttypes.h>
 #include <stdio.h>
+#include <math.h>
 
 #ifdef LV_HAVE_SSE2
 #include <emmintrin.h>
@@ -57,7 +58,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
       r = max_val;
     else if(r < min_val)
       r = min_val;
-    outputVector[number] = (int16_t)(r);
+    outputVector[number] = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_SSE2 */
@@ -99,10 +100,10 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
 
     _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
   }
 
   number = quarterPoints * 4;    
@@ -112,7 +113,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
       r = max_val;
     else if(r < min_val)
       r = min_val;
-    outputVector[number] = (int16_t)(r);
+    outputVector[number] = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -140,7 +141,7 @@ static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, co
       r = max_val;
     else if(r < min_val)
       r = min_val;
-    *outputVectorPtr++ = (int16_t)(r);
+    *outputVectorPtr++ = (int16_t)rintf(r);
   }
 }
 #endif /* LV_HAVE_GENERIC */
-- 
cgit