diff options
Diffstat (limited to 'volk/include')
6 files changed, 15 insertions, 13 deletions
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h index f60b33a41..a10a62350 100644 --- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H #define INCLUDED_volk_16i_max_star_horizontal_16i_a_H +#include <volk/volk_common.h> #include<inttypes.h> #include<stdio.h> @@ -21,7 +22,7 @@ static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in - volatile __m128i xmm0, xmm1, xmm2, xmm3, xmm4; + __m128i xmm0, xmm1, xmm2, xmm3, xmm4; __m128i xmm5, xmm6, xmm7, xmm8; xmm4 = _mm_load_si128((__m128i*)shufmask0); @@ -92,8 +93,7 @@ static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in xmm0 = _mm_shuffle_epi8(xmm0, xmm3); - - _mm_storel_pd((double*)p_target, (__m128d)xmm0); + _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec); p_target = (__m128i*)((int8_t*)p_target + 8); diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h index 9566aa32e..125a34582 100644 --- a/volk/include/volk/volk_32fc_index_max_16u_a.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a.h @@ -87,8 +87,8 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_ xmm2 = _mm_load_ps((float*)src0); - xmm1 = _mm_movelh_ps((__m128)xmm8, (__m128)xmm8); - xmm8 = (__m128i)xmm1; + xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec); + xmm8 = bit128_p(&xmm1)->int_vec; xmm2 = _mm_mul_ps(xmm2, xmm2); diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h index f11c93682..02faf86c2 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -96,9 +96,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result in1 = _mm_loadu_ps( (float*) (input+offset) ); in2 = _mm_loadu_ps( (float*) (taps+offset) ); - Rv = in1*in2; + Rv = _mm_mul_ps(in1, in2); fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1)); - Iv = in1*fehg; + Iv = _mm_mul_ps(in1, fehg); Rs = _mm_hadd_ps( _mm_hadd_ps(Rv, zv) ,zv); Ivm = _mm_xor_ps( negMask.vec, Iv ); Is = _mm_hadd_ps( _mm_hadd_ps(Ivm, zv) ,zv); @@ -119,9 +119,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result in1 = _mm_loadu_ps( (float*) (input+offset) ); in2 = _mm_loadu_ps( (float*) (taps+offset) ); - Rv = _mm_and_ps(in1*in2, halfMask.vec); + Rv = _mm_and_ps(_mm_mul_ps(in1, in2), halfMask.vec); fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1)); - Iv = _mm_and_ps(in1*fehg, halfMask.vec); + Iv = _mm_and_ps(_mm_mul_ps(in1, fehg), halfMask.vec); Rs = _mm_hadd_ps(_mm_hadd_ps(Rv, zv),zv); Ivm = _mm_xor_ps( negMask.vec, Iv ); Is = _mm_hadd_ps(_mm_hadd_ps(Ivm, zv),zv); diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h index 0bb76f1d1..0c280eb6e 100644 --- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h @@ -26,8 +26,8 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect for(;number < quarterPoints; number++){ // Convert into 8 bit values into 16 bit values - x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a)); - y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b)); + x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a)); + y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b)); // Calculate the ar*cr - ai*(-ci) portions realz = _mm_madd_epi16(x,y); diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h index 3e05608a4..a2c2b04f6 100644 --- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h @@ -29,8 +29,8 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* for(;number < quarterPoints; number++){ // Convert into 8 bit values into 16 bit values - x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a)); - y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b)); + x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a)); + y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b)); // Calculate the ar*cr - ai*(-ci) portions realz = _mm_madd_epi16(x,y); diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h index 2c935d1fb..38263d5f7 100644 --- a/volk/include/volk/volk_common.h +++ b/volk/include/volk/volk_common.h @@ -91,4 +91,6 @@ union bit128{ #endif }; +#define bit128_p(x) ((union bit128 *)(x)) + #endif /*INCLUDED_LIBVOLK_COMMON_H*/ |