summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h
diff options
context:
space:
mode:
Diffstat (limited to 'volk/include/volk/volk_32f_stddev_and_mean_aligned16.h')
-rw-r--r--volk/include/volk/volk_32f_stddev_and_mean_aligned16.h169
1 files changed, 169 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h b/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h
new file mode 100644
index 000000000..1cd502257
--- /dev/null
+++ b/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h
@@ -0,0 +1,169 @@
+#ifndef INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H
+#define INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#if LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Calculates the standard deviation and mean of the input buffer
+ \param stddev The calculated standard deviation
+ \param mean The mean of the input buffer
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param num_points The number of values in input buffer to used in the stddev and mean calculations
+*/
+static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+ float returnValue = 0;
+ float newMean = 0;
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int sixteenthPoints = num_points / 16;
+
+ const float* aPtr = inputBuffer;
+ float meanBuffer[4] __attribute__((aligned(128)));
+ float squareBuffer[4] __attribute__((aligned(128)));
+
+ __m128 accumulator = _mm_setzero_ps();
+ __m128 squareAccumulator = _mm_setzero_ps();
+ __m128 aVal1, aVal2, aVal3, aVal4;
+ __m128 cVal1, cVal2, cVal3, cVal4;
+ for(;number < sixteenthPoints; number++) {
+ aVal1 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
+ accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x
+
+ aVal2 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
+ accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x
+
+ aVal3 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
+ accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x
+
+ aVal4 = _mm_load_ps(aPtr); aPtr += 4;
+ cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
+ accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x
+
+ cVal1 = _mm_or_ps(cVal1, cVal2);
+ cVal3 = _mm_or_ps(cVal3, cVal4);
+ cVal1 = _mm_or_ps(cVal1, cVal3);
+
+ squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
+ }
+ _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
+ _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
+ newMean = meanBuffer[0];
+ newMean += meanBuffer[1];
+ newMean += meanBuffer[2];
+ newMean += meanBuffer[3];
+ returnValue = squareBuffer[0];
+ returnValue += squareBuffer[1];
+ returnValue += squareBuffer[2];
+ returnValue += squareBuffer[3];
+
+ number = sixteenthPoints * 16;
+ for(;number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ newMean += *aPtr++;
+ }
+ newMean /= num_points;
+ returnValue /= num_points;
+ returnValue -= (newMean * newMean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+ *mean = newMean;
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#if LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Calculates the standard deviation and mean of the input buffer
+ \param stddev The calculated standard deviation
+ \param mean The mean of the input buffer
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param num_points The number of values in input buffer to used in the stddev and mean calculations
+*/
+static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+ float returnValue = 0;
+ float newMean = 0;
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* aPtr = inputBuffer;
+ float meanBuffer[4] __attribute__((aligned(128)));
+ float squareBuffer[4] __attribute__((aligned(128)));
+
+ __m128 accumulator = _mm_setzero_ps();
+ __m128 squareAccumulator = _mm_setzero_ps();
+ __m128 aVal = _mm_setzero_ps();
+ for(;number < quarterPoints; number++) {
+ aVal = _mm_load_ps(aPtr); // aVal = x
+ accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x
+ aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
+ squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
+ aPtr += 4;
+ }
+ _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
+ _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
+ newMean = meanBuffer[0];
+ newMean += meanBuffer[1];
+ newMean += meanBuffer[2];
+ newMean += meanBuffer[3];
+ returnValue = squareBuffer[0];
+ returnValue += squareBuffer[1];
+ returnValue += squareBuffer[2];
+ returnValue += squareBuffer[3];
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ newMean += *aPtr++;
+ }
+ newMean /= num_points;
+ returnValue /= num_points;
+ returnValue -= (newMean * newMean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+ *mean = newMean;
+}
+#endif /* LV_HAVE_SSE */
+
+#if LV_HAVE_GENERIC
+/*!
+ \brief Calculates the standard deviation and mean of the input buffer
+ \param stddev The calculated standard deviation
+ \param mean The mean of the input buffer
+ \param inputBuffer The buffer of points to calculate the std deviation for
+ \param num_points The number of values in input buffer to used in the stddev and mean calculations
+*/
+static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+ float returnValue = 0;
+ float newMean = 0;
+ if(num_points > 0){
+ const float* aPtr = inputBuffer;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ returnValue += (*aPtr) * (*aPtr);
+ newMean += *aPtr++;
+ }
+ newMean /= num_points;
+ returnValue /= num_points;
+ returnValue -= (newMean * newMean);
+ returnValue = sqrt(returnValue);
+ }
+ *stddev = returnValue;
+ *mean = newMean;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H */