summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Foster2011-05-12 16:17:46 -0700
committerNick Foster2011-05-12 16:17:46 -0700
commit88d41e8bc991476880d40d2369018e9adf0011ff (patch)
treed82cd8850a5539e4d10394af46df65bee7d26dd9
parenta346b0d610e2952e9ffc45363425e4f2becea109 (diff)
downloadgnuradio-88d41e8bc991476880d40d2369018e9adf0011ff.tar.gz
gnuradio-88d41e8bc991476880d40d2369018e9adf0011ff.tar.bz2
gnuradio-88d41e8bc991476880d40d2369018e9adf0011ff.zip
Volk: added an AVX impl (of 32f multiply) just to see if it's any faster. It's not.
-rw-r--r--volk/include/volk/volk_32f_x2_multiply_32f_a16.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
index cef17f5a6..885941abf 100644
--- a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
@@ -43,6 +43,45 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float*
}
#endif /* LV_HAVE_SSE */
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Multiplies the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_a16_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m256 aVal, bVal, cVal;
+ for(;number < eighthPoints; number++){
+
+ aVal = _mm256_load_ps(aPtr);
+ bVal = _mm256_load_ps(bPtr);
+
+ cVal = _mm256_mul_ps(aVal, bVal);
+
+ _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 8;
+ bPtr += 8;
+ cPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_AVX */
+
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplys the two input vectors and store their results in the third vector