summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
diff options
context:
space:
mode:
Diffstat (limited to 'volk/include/volk/volk_32f_s32f_stddev_32f_a16.h')
-rw-r--r--volk/include/volk/volk_32f_s32f_stddev_32f_a16.h144
1 files changed, 144 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
new file mode 100644
index 000000000..32f4fa067
--- /dev/null
+++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
@@ -0,0 +1,144 @@
+#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H
+#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#if LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Calculates the standard deviation of the input buffer using the supplied mean
+ \param stddev The calculated standard deviation
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param mean The mean of the input buffer
+ \param num_points The number of values in input buffer to used in the stddev calculation
+*/
+static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+ float returnValue = 0;
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int sixteenthPoints = num_points / 16;
+
+ const float* aPtr = inputBuffer;
+
+ float squareBuffer[4] __attribute__((aligned(128)));
+
+ __m128 squareAccumulator = _mm_setzero_ps();
+ __m128 aVal1, aVal2, aVal3, aVal4;
+ __m128 cVal1, cVal2, cVal3, cVal4;
+ for(;number < sixteenthPoints; number++) {
+ aVal1 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
+
+ aVal2 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
+
+ aVal3 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
+
+ aVal4 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
+
+ cVal1 = _mm_or_ps(cVal1, cVal2);
+ cVal3 = _mm_or_ps(cVal3, cVal4);
+ cVal1 = _mm_or_ps(cVal1, cVal3);
+
+ squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
+ }
+ _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
+ returnValue = squareBuffer[0];
+ returnValue += squareBuffer[1];
+ returnValue += squareBuffer[2];
+ returnValue += squareBuffer[3];
+
+ number = sixteenthPoints * 16;
+ for(;number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ aPtr++;
+ }
+ returnValue /= num_points;
+ returnValue -= (mean * mean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#if LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Calculates the standard deviation of the input buffer using the supplied mean
+ \param stddev The calculated standard deviation
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param mean The mean of the input buffer
+ \param num_points The number of values in input buffer to used in the stddev calculation
+*/
+static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+ float returnValue = 0;
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* aPtr = inputBuffer;
+
+ float squareBuffer[4] __attribute__((aligned(128)));
+
+ __m128 squareAccumulator = _mm_setzero_ps();
+ __m128 aVal = _mm_setzero_ps();
+ for(;number < quarterPoints; number++) {
+ aVal = _mm_load_ps(aPtr); // aVal = x
+ aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
+ squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
+ aPtr += 4;
+ }
+ _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
+ returnValue = squareBuffer[0];
+ returnValue += squareBuffer[1];
+ returnValue += squareBuffer[2];
+ returnValue += squareBuffer[3];
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ aPtr++;
+ }
+ returnValue /= num_points;
+ returnValue -= (mean * mean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+}
+#endif /* LV_HAVE_SSE */
+
+#if LV_HAVE_GENERIC
+/*!
+ \brief Calculates the standard deviation of the input buffer using the supplied mean
+ \param stddev The calculated standard deviation
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param mean The mean of the input buffer
+ \param num_points The number of values in input buffer to used in the stddev calculation
+*/
+static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+ float returnValue = 0;
+ if(num_points > 0){
+ const float* aPtr = inputBuffer;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ aPtr++;
+ }
+
+ returnValue /= num_points;
+ returnValue -= (mean * mean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */