diff options
Diffstat (limited to 'volk/include/volk/volk_32f_interleave_16sc_aligned16.h')
-rw-r--r-- | volk/include/volk/volk_32f_interleave_16sc_aligned16.h | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h b/volk/include/volk/volk_32f_interleave_16sc_aligned16.h new file mode 100644 index 000000000..476946b88 --- /dev/null +++ b/volk/include/volk/volk_32f_interleave_16sc_aligned16.h @@ -0,0 +1,155 @@ +#ifndef INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H +#define INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H + +#include <inttypes.h> +#include <stdio.h> + +#if LV_HAVE_SSE2 +#include <emmintrin.h> + /*! + \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. + \param iBuffer The I buffer data to be interleaved + \param qBuffer The Q buffer data to be interleaved + \param complexVector The complex output vector + \param scalar The scaling value being multiplied against each data point + \param num_points The number of complex data values to be interleaved + */ +static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const float* iBufferPtr = iBuffer; + const float* qBufferPtr = qBuffer; + + __m128 vScalar = _mm_set_ps1(scalar); + + const unsigned int quarterPoints = num_points / 4; + + __m128 iValue, qValue, cplxValue1, cplxValue2; + __m128i intValue1, intValue2; + + int16_t* complexVectorPtr = (int16_t*)complexVector; + + for(;number < quarterPoints; number++){ + iValue = _mm_load_ps(iBufferPtr); + qValue = _mm_load_ps(qBufferPtr); + + // Interleaves the lower two values in the i and q variables into one buffer + cplxValue1 = _mm_unpacklo_ps(iValue, qValue); + cplxValue1 = _mm_mul_ps(cplxValue1, vScalar); + + // Interleaves the upper two values in the i and q variables into one buffer + cplxValue2 = _mm_unpackhi_ps(iValue, qValue); + cplxValue2 = _mm_mul_ps(cplxValue2, vScalar); + + intValue1 = _mm_cvtps_epi32(cplxValue1); + intValue2 = _mm_cvtps_epi32(cplxValue2); + + intValue1 = _mm_packs_epi32(intValue1, intValue2); + + _mm_store_si128((__m128i*)complexVectorPtr, intValue1); + complexVectorPtr += 8; + + iBufferPtr += 4; + qBufferPtr += 4; + } + + number = quarterPoints * 4; + complexVectorPtr = (int16_t*)(&complexVector[number]); + for(; number < num_points; number++){ + *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); + *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); + } + +} +#endif /* LV_HAVE_SSE2 */ + +#if LV_HAVE_SSE +#include <xmmintrin.h> + /*! + \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. + \param iBuffer The I buffer data to be interleaved + \param qBuffer The Q buffer data to be interleaved + \param complexVector The complex output vector + \param scalar The scaling value being multiplied against each data point + \param num_points The number of complex data values to be interleaved + */ +static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const float* iBufferPtr = iBuffer; + const float* qBufferPtr = qBuffer; + + __m128 vScalar = _mm_set_ps1(scalar); + + const unsigned int quarterPoints = num_points / 4; + + __m128 iValue, qValue, cplxValue; + + int16_t* complexVectorPtr = (int16_t*)complexVector; + + float floatBuffer[4] __attribute__((aligned(128))); + + for(;number < quarterPoints; number++){ + iValue = _mm_load_ps(iBufferPtr); + qValue = _mm_load_ps(qBufferPtr); + + // Interleaves the lower two values in the i and q variables into one buffer + cplxValue = _mm_unpacklo_ps(iValue, qValue); + cplxValue = _mm_mul_ps(cplxValue, vScalar); + + _mm_store_ps(floatBuffer, cplxValue); + + *complexVectorPtr++ = (int16_t)(floatBuffer[0]); + *complexVectorPtr++ = (int16_t)(floatBuffer[1]); + *complexVectorPtr++ = (int16_t)(floatBuffer[2]); + *complexVectorPtr++ = (int16_t)(floatBuffer[3]); + + // Interleaves the upper two values in the i and q variables into one buffer + cplxValue = _mm_unpackhi_ps(iValue, qValue); + cplxValue = _mm_mul_ps(cplxValue, vScalar); + + _mm_store_ps(floatBuffer, cplxValue); + + *complexVectorPtr++ = (int16_t)(floatBuffer[0]); + *complexVectorPtr++ = (int16_t)(floatBuffer[1]); + *complexVectorPtr++ = (int16_t)(floatBuffer[2]); + *complexVectorPtr++ = (int16_t)(floatBuffer[3]); + + iBufferPtr += 4; + qBufferPtr += 4; + } + + number = quarterPoints * 4; + complexVectorPtr = (int16_t*)(&complexVector[number]); + for(; number < num_points; number++){ + *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); + *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); + } + +} +#endif /* LV_HAVE_SSE */ + +#if LV_HAVE_GENERIC + /*! + \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. + \param iBuffer The I buffer data to be interleaved + \param qBuffer The Q buffer data to be interleaved + \param complexVector The complex output vector + \param scalar The scaling value being multiplied against each data point + \param num_points The number of complex data values to be interleaved + */ +static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ + int16_t* complexVectorPtr = (int16_t*)complexVector; + const float* iBufferPtr = iBuffer; + const float* qBufferPtr = qBuffer; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); + *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H */ |