summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_32f_x2_max_32f_a16.h
diff options
context:
space:
mode:
Diffstat (limited to 'volk/include/volk/volk_32f_x2_max_32f_a16.h')
-rw-r--r--volk/include/volk/volk_32f_x2_max_32f_a16.h85
1 files changed, 85 insertions, 0 deletions
diff --git a/volk/include/volk/volk_32f_x2_max_32f_a16.h b/volk/include/volk/volk_32f_x2_max_32f_a16.h
new file mode 100644
index 000000000..26e7f1246
--- /dev/null
+++ b/volk/include/volk/volk_32f_x2_max_32f_a16.h
@@ -0,0 +1,85 @@
+#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H
+#define INCLUDED_volk_32f_x2_max_32f_a16_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#if LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m128 aVal, bVal, cVal;
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_load_ps(aPtr);
+ bVal = _mm_load_ps(bPtr);
+
+ cVal = _mm_max_ps(aVal, bVal);
+
+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ bPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ const float a = *aPtr++;
+ const float b = *bPtr++;
+ *cPtr++ = ( a > b ? a : b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#if LV_HAVE_GENERIC
+/*!
+ \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ const float a = *aPtr++;
+ const float b = *bPtr++;
+ *cPtr++ = ( a > b ? a : b);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#if LV_HAVE_ORC
+/*!
+ \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
+
+#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */