core: moving multiply_const_ff from gengen to general to take advantage of volk.

Also adds SSE and AVX and unaligned Volk versions for this.
author: Tom Rondeau 2012-02-02 17:26:39 -0500
committer: Tom Rondeau 2012-02-13 14:56:34 -0500
commit: ae663decab658be25ac01072fa2f5c8454bd6167 (patch)
tree: c9bf58b3a8624764ed92f5c4bb9a8df6d54dc07b /volk
parent: a3b19015cb1c896aef19a7817458878337b3f5e3 (diff)
download: gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.gz
gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.bz2
gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.zip
3 files changed, 82 insertions, 6 deletions
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index c198ec42d..7da8651e9 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -110,6 +110,7 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
     VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 204600, 1000, &results);
 
     char path[256];
     get_config_path(path);
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
index 37223dc81..d1c6f3f65 100644
--- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
@@ -4,6 +4,81 @@
 #include <inttypes.h>
 #include <stdio.h>
 
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int quarterPoints = num_points / 4;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m128 aVal, bVal, cVal;
+    bVal = _mm_set_ps1(scalar);
+    for(;number < quarterPoints; number++){
+      
+      aVal = _mm_load_ps(aPtr); 
+      
+      cVal = _mm_mul_ps(aVal, bVal); 
+      
+      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 4;
+      cPtr += 4;
+    }
+
+    number = quarterPoints * 4;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Scalar float multiply
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param scalar the scalar value
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+    unsigned int number = 0;
+    const unsigned int eighthPoints = num_points / 8;
+
+    float* cPtr = cVector;
+    const float* aPtr = aVector;
+
+    __m256 aVal, bVal, cVal;
+    bVal = _mm256_set1_ps(scalar);
+    for(;number < eighthPoints; number++){
+      
+      aVal = _mm256_load_ps(aPtr); 
+      
+      cVal = _mm256_mul_ps(aVal, bVal); 
+      
+      _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
+
+      aPtr += 8;
+      cPtr += 8;
+    }
+
+    number = eighthPoints * 8;
+    for(;number < num_points; number++){
+      *cPtr++ = (*aPtr++) * scalar;
+    }
+}
+#endif /* LV_HAVE_AVX */
+
+
 #ifdef LV_HAVE_GENERIC
 /*!
   \brief Scalar float multiply
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
index a9dfcda19..450a89066 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -9,10 +9,10 @@
 #ifdef LV_HAVE_SSE3
 #include <pmmintrin.h>
   /*!
-    \brief Multiplies the two input complex vectors and stores their results in the third vector
+  \brief Multiplies the input vector by a scalar and stores the results in the third vector
     \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
+    \param aVector The vector to be multiplied
+    \param scalar The complex scalar to multiply aVector
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
 static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
@@ -53,10 +53,10 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, cons
 
 #ifdef LV_HAVE_GENERIC
   /*!
-    \brief Multiplies the two input complex vectors and stores their results in the third vector
+  \brief Multiplies the input vector by a scalar and stores the results in the third vector
     \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
+    \param aVector The vector to be multiplied
+    \param scalar The complex scalar to multiply aVector
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
 static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
author	Tom Rondeau	2012-02-02 17:26:39 -0500
committer	Tom Rondeau	2012-02-13 14:56:34 -0500
commit	ae663decab658be25ac01072fa2f5c8454bd6167 (patch)
tree	c9bf58b3a8624764ed92f5c4bb9a8df6d54dc07b /volk
parent	a3b19015cb1c896aef19a7817458878337b3f5e3 (diff)
download	gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.gz gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.tar.bz2 gnuradio-ae663decab658be25ac01072fa2f5c8454bd6167.zip