summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/include/volk/volk_16sc_magnitude_32f_aligned16.h70
1 files changed, 70 insertions, 0 deletions
diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
index 8ba5737e8..9c2a48835 100644
--- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
@@ -70,6 +70,76 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector
}
#endif /* LV_HAVE_SSE3 */
+#if LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param scalar The data value to be divided against each input data value of the input complex vector
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const int16_t* complexVectorPtr = (const int16_t*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ const float iScalar = 1.0 / scalar;
+ __m128 invScalar = _mm_set_ps1(iScalar);
+
+ __m128 cplxValue1, cplxValue2, result, re, im;
+
+ float inputFloatBuffer[8] __attribute__((aligned(128)));
+
+ for(;number < quarterPoints; number++){
+ inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
+ inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
+ inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
+ inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
+
+ inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
+ inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
+ inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
+ inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
+
+ cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
+ cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
+
+ re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
+ im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
+
+ complexVectorPtr += 8;
+
+ cplxValue1 = _mm_mul_ps(re, invScalar);
+ cplxValue2 = _mm_mul_ps(im, invScalar);
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ result = _mm_sqrt_ps(result); // Square root the values
+
+ _mm_store_ps(magnitudeVectorPtr, result);
+
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ magnitudeVectorPtr = &magnitudeVector[number];
+ complexVectorPtr = (const int16_t*)&complexVector[number];
+ for(; number < num_points; number++){
+ float val1Real = (float)(*complexVectorPtr++) * iScalar;
+ float val1Imag = (float)(*complexVectorPtr++) * iScalar;
+ *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
+ }
+}
+
+
+#endif /* LV_HAVE_SSE */
+
#if LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector