path: root/volk/include/volk/volk_32f_s32f_power_32f_a16.h
diff options
Diffstat (limited to 'volk/include/volk/volk_32f_s32f_power_32f_a16.h')
1 files changed, 144 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h
new file mode 100644
index 000000000..3ed594d9a
--- /dev/null
+++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h
@@ -0,0 +1,144 @@
+#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H
+#define INCLUDED_volk_32f_s32f_power_32f_a16_H
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+#if LV_HAVE_SSE4_1
+#include <tmmintrin.h>
+#include <simdmath.h>
+#endif /* LV_HAVE_LIB_SIMDMATH */
+ \brief Takes each the input vector value to the specified power and stores the results in the return vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector of values to be taken to a power
+ \param power The power value to be applied to each data point
+ \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
+static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ __m128 vPower = _mm_set_ps1(power);
+ __m128 zeroValue = _mm_setzero_ps();
+ __m128 signMask;
+ __m128 negatedValues;
+ __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
+ __m128 onesMask = _mm_set_ps1(1);
+ __m128 aVal, cVal;
+ for(;number < quarterPoints; number++){
+ aVal = _mm_load_ps(aPtr);
+ signMask = _mm_cmplt_ps(aVal, zeroValue);
+ negatedValues = _mm_sub_ps(zeroValue, aVal);
+ aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
+ // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
+ cVal = powf4(aVal, vPower); // Takes each input value to the specified power
+ cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+ aPtr += 4;
+ cPtr += 4;
+ }
+ number = quarterPoints * 4;
+#endif /* LV_HAVE_LIB_SIMDMATH */
+ for(;number < num_points; number++){
+ *cPtr++ = powf((*aPtr++), power);
+ }
+#endif /* LV_HAVE_SSE4_1 */
+#include <xmmintrin.h>
+#include <simdmath.h>
+#endif /* LV_HAVE_LIB_SIMDMATH */
+ \brief Takes each the input vector value to the specified power and stores the results in the return vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector of values to be taken to a power
+ \param power The power value to be applied to each data point
+ \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
+static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ __m128 vPower = _mm_set_ps1(power);
+ __m128 zeroValue = _mm_setzero_ps();
+ __m128 signMask;
+ __m128 negatedValues;
+ __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
+ __m128 onesMask = _mm_set_ps1(1);
+ __m128 aVal, cVal;
+ for(;number < quarterPoints; number++){
+ aVal = _mm_load_ps(aPtr);
+ signMask = _mm_cmplt_ps(aVal, zeroValue);
+ negatedValues = _mm_sub_ps(zeroValue, aVal);
+ aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
+ // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
+ cVal = powf4(aVal, vPower); // Takes each input value to the specified power
+ cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+ aPtr += 4;
+ cPtr += 4;
+ }
+ number = quarterPoints * 4;
+#endif /* LV_HAVE_LIB_SIMDMATH */
+ for(;number < num_points; number++){
+ *cPtr++ = powf((*aPtr++), power);
+ }
+#endif /* LV_HAVE_SSE */
+ /*!
+ \brief Takes each the input vector value to the specified power and stores the results in the return vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector of values to be taken to a power
+ \param power The power value to be applied to each data point
+ \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
+ */
+static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = powf((*aPtr++), power);
+ }
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */