diff options
Diffstat (limited to 'volk/include/volk/volk_32f_s32f_power_32f_a16.h')
-rw-r--r-- | volk/include/volk/volk_32f_s32f_power_32f_a16.h | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h new file mode 100644 index 000000000..3ed594d9a --- /dev/null +++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h @@ -0,0 +1,144 @@ +#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H +#define INCLUDED_volk_32f_s32f_power_32f_a16_H + +#include <inttypes.h> +#include <stdio.h> +#include <math.h> + +#if LV_HAVE_SSE4_1 +#include <tmmintrin.h> + +#if LV_HAVE_LIB_SIMDMATH +#include <simdmath.h> +#endif /* LV_HAVE_LIB_SIMDMATH */ + +/*! + \brief Takes each the input vector value to the specified power and stores the results in the return vector + \param cVector The vector where the results will be stored + \param aVector The vector of values to be taken to a power + \param power The power value to be applied to each data point + \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector +*/ +static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + +#if LV_HAVE_LIB_SIMDMATH + __m128 vPower = _mm_set_ps1(power); + __m128 zeroValue = _mm_setzero_ps(); + __m128 signMask; + __m128 negatedValues; + __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); + __m128 onesMask = _mm_set_ps1(1); + + __m128 aVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_load_ps(aPtr); + signMask = _mm_cmplt_ps(aVal, zeroValue); + negatedValues = _mm_sub_ps(zeroValue, aVal); + aVal = _mm_blendv_ps(aVal, negatedValues, signMask); + + // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after + cVal = powf4(aVal, vPower); // Takes each input value to the specified power + + cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal); + + _mm_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; +#endif /* LV_HAVE_LIB_SIMDMATH */ + + for(;number < num_points; number++){ + *cPtr++ = powf((*aPtr++), power); + } +} +#endif /* LV_HAVE_SSE4_1 */ + +#if LV_HAVE_SSE +#include <xmmintrin.h> + +#if LV_HAVE_LIB_SIMDMATH +#include <simdmath.h> +#endif /* LV_HAVE_LIB_SIMDMATH */ + +/*! + \brief Takes each the input vector value to the specified power and stores the results in the return vector + \param cVector The vector where the results will be stored + \param aVector The vector of values to be taken to a power + \param power The power value to be applied to each data point + \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector +*/ +static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + +#if LV_HAVE_LIB_SIMDMATH + __m128 vPower = _mm_set_ps1(power); + __m128 zeroValue = _mm_setzero_ps(); + __m128 signMask; + __m128 negatedValues; + __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); + __m128 onesMask = _mm_set_ps1(1); + + __m128 aVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_load_ps(aPtr); + signMask = _mm_cmplt_ps(aVal, zeroValue); + negatedValues = _mm_sub_ps(zeroValue, aVal); + aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) ); + + // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after + cVal = powf4(aVal, vPower); // Takes each input value to the specified power + + cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal); + + _mm_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; +#endif /* LV_HAVE_LIB_SIMDMATH */ + + for(;number < num_points; number++){ + *cPtr++ = powf((*aPtr++), power); + } +} +#endif /* LV_HAVE_SSE */ + +#if LV_HAVE_GENERIC + /*! + \brief Takes each the input vector value to the specified power and stores the results in the return vector + \param cVector The vector where the results will be stored + \param aVector The vector of values to be taken to a power + \param power The power value to be applied to each data point + \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector + */ +static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ + float* cPtr = cVector; + const float* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = powf((*aPtr++), power); + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */ |