summaryrefslogtreecommitdiff
path: root/volk/include
diff options
context:
space:
mode:
Diffstat (limited to 'volk/include')
-rw-r--r--volk/include/volk/volk_16i_max_star_horizontal_16i_a.h6
-rw-r--r--volk/include/volk/volk_32fc_index_max_16u_a.h4
-rw-r--r--volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h8
-rw-r--r--volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h4
-rw-r--r--volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h4
-rw-r--r--volk/include/volk/volk_common.h2
6 files changed, 15 insertions, 13 deletions
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index f60b33a41..a10a62350 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -1,6 +1,7 @@
#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
+#include <volk/volk_common.h>
#include<inttypes.h>
#include<stdio.h>
@@ -21,7 +22,7 @@ static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
- volatile __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
__m128i xmm5, xmm6, xmm7, xmm8;
xmm4 = _mm_load_si128((__m128i*)shufmask0);
@@ -92,8 +93,7 @@ static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
-
- _mm_storel_pd((double*)p_target, (__m128d)xmm0);
+ _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
p_target = (__m128i*)((int8_t*)p_target + 8);
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 9566aa32e..125a34582 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -87,8 +87,8 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
xmm2 = _mm_load_ps((float*)src0);
- xmm1 = _mm_movelh_ps((__m128)xmm8, (__m128)xmm8);
- xmm8 = (__m128i)xmm1;
+ xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
+ xmm8 = bit128_p(&xmm1)->int_vec;
xmm2 = _mm_mul_ps(xmm2, xmm2);
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index f11c93682..02faf86c2 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -96,9 +96,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
in1 = _mm_loadu_ps( (float*) (input+offset) );
in2 = _mm_loadu_ps( (float*) (taps+offset) );
- Rv = in1*in2;
+ Rv = _mm_mul_ps(in1, in2);
fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
- Iv = in1*fehg;
+ Iv = _mm_mul_ps(in1, fehg);
Rs = _mm_hadd_ps( _mm_hadd_ps(Rv, zv) ,zv);
Ivm = _mm_xor_ps( negMask.vec, Iv );
Is = _mm_hadd_ps( _mm_hadd_ps(Ivm, zv) ,zv);
@@ -119,9 +119,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
in1 = _mm_loadu_ps( (float*) (input+offset) );
in2 = _mm_loadu_ps( (float*) (taps+offset) );
- Rv = _mm_and_ps(in1*in2, halfMask.vec);
+ Rv = _mm_and_ps(_mm_mul_ps(in1, in2), halfMask.vec);
fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
- Iv = _mm_and_ps(in1*fehg, halfMask.vec);
+ Iv = _mm_and_ps(_mm_mul_ps(in1, fehg), halfMask.vec);
Rs = _mm_hadd_ps(_mm_hadd_ps(Rv, zv),zv);
Ivm = _mm_xor_ps( negMask.vec, Iv );
Is = _mm_hadd_ps(_mm_hadd_ps(Ivm, zv),zv);
diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
index 0bb76f1d1..0c280eb6e 100644
--- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
@@ -26,8 +26,8 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect
for(;number < quarterPoints; number++){
// Convert into 8 bit values into 16 bit values
- x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
- y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+ x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+ y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
// Calculate the ar*cr - ai*(-ci) portions
realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
index 3e05608a4..a2c2b04f6 100644
--- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
@@ -29,8 +29,8 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t*
for(;number < quarterPoints; number++){
// Convert into 8 bit values into 16 bit values
- x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
- y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+ x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+ y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
// Calculate the ar*cr - ai*(-ci) portions
realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h
index 2c935d1fb..38263d5f7 100644
--- a/volk/include/volk/volk_common.h
+++ b/volk/include/volk/volk_common.h
@@ -91,4 +91,6 @@ union bit128{
#endif
};
+#define bit128_p(x) ((union bit128 *)(x))
+
#endif /*INCLUDED_LIBVOLK_COMMON_H*/