diff options
-rw-r--r-- | volk/include/volk/volk_16sc_magnitude_32f_aligned16.h | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h index 8ba5737e8..9c2a48835 100644 --- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h @@ -70,6 +70,76 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector } #endif /* LV_HAVE_SSE3 */ +#if LV_HAVE_SSE +#include <xmmintrin.h> +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const int16_t* complexVectorPtr = (const int16_t*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + const float iScalar = 1.0 / scalar; + __m128 invScalar = _mm_set_ps1(iScalar); + + __m128 cplxValue1, cplxValue2, result, re, im; + + float inputFloatBuffer[8] __attribute__((aligned(128))); + + for(;number < quarterPoints; number++){ + inputFloatBuffer[0] = (float)(complexVectorPtr[0]); + inputFloatBuffer[1] = (float)(complexVectorPtr[1]); + inputFloatBuffer[2] = (float)(complexVectorPtr[2]); + inputFloatBuffer[3] = (float)(complexVectorPtr[3]); + + inputFloatBuffer[4] = (float)(complexVectorPtr[4]); + inputFloatBuffer[5] = (float)(complexVectorPtr[5]); + inputFloatBuffer[6] = (float)(complexVectorPtr[6]); + inputFloatBuffer[7] = (float)(complexVectorPtr[7]); + + cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]); + cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]); + + re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88); + im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd); + + complexVectorPtr += 8; + + cplxValue1 = _mm_mul_ps(re, invScalar); + cplxValue2 = _mm_mul_ps(im, invScalar); + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + result = _mm_sqrt_ps(result); // Square root the values + + _mm_store_ps(magnitudeVectorPtr, result); + + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + magnitudeVectorPtr = &magnitudeVector[number]; + complexVectorPtr = (const int16_t*)&complexVector[number]; + for(; number < num_points; number++){ + float val1Real = (float)(*complexVectorPtr++) * iScalar; + float val1Imag = (float)(*complexVectorPtr++) * iScalar; + *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)); + } +} + + +#endif /* LV_HAVE_SSE */ + #if LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector |