volk: conversion tweaks to build avx on MSVC

author: Josh Blum 2011-11-14 11:30:59 -0800
committer: Josh Blum 2011-11-30 21:33:57 -0500
commit: 52c51c983d51ff725238c22571b2d466875a5f22 (patch)
tree: f9cd62cbefdfed70182b88286c5e23cfbe170c84
parent: 6bf88fd950a9606d41d611ace8368f319a4b06e0 (diff)
download: gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.tar.gz
gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.tar.bz2
gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.zip
6 files changed, 15 insertions, 13 deletions
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index f60b33a41..a10a62350 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
 #define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
 
+#include <volk/volk_common.h>
 
 #include<inttypes.h>
 #include<stdio.h>	
@@ -21,7 +22,7 @@ static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
 
   
   
-  volatile __m128i xmm0, xmm1, xmm2, xmm3, xmm4; 
+  __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i  xmm5, xmm6, xmm7, xmm8;
   
   xmm4 = _mm_load_si128((__m128i*)shufmask0);
@@ -92,8 +93,7 @@ static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, in
     
     xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
     
-
-    _mm_storel_pd((double*)p_target, (__m128d)xmm0);
+    _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
     
     p_target = (__m128i*)((int8_t*)p_target + 8);
 
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 9566aa32e..125a34582 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -87,8 +87,8 @@ static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_
 
     xmm2 = _mm_load_ps((float*)src0);
     
-    xmm1 = _mm_movelh_ps((__m128)xmm8, (__m128)xmm8);
-    xmm8 = (__m128i)xmm1;
+    xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, bit128_p(&xmm8)->float_vec);
+    xmm8 = bit128_p(&xmm1)->int_vec;
 
     xmm2 = _mm_mul_ps(xmm2, xmm2);
 
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index f11c93682..02faf86c2 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -96,9 +96,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
 
     in1 = _mm_loadu_ps( (float*) (input+offset) );
     in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = in1*in2;
+    Rv = _mm_mul_ps(in1, in2);
     fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = in1*fehg;
+    Iv = _mm_mul_ps(in1, fehg);
     Rs = _mm_hadd_ps( _mm_hadd_ps(Rv, zv) ,zv);
     Ivm = _mm_xor_ps( negMask.vec, Iv );
     Is = _mm_hadd_ps( _mm_hadd_ps(Ivm, zv) ,zv);
@@ -119,9 +119,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result
 
     in1 = _mm_loadu_ps( (float*) (input+offset) );
     in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = _mm_and_ps(in1*in2, halfMask.vec);
+    Rv = _mm_and_ps(_mm_mul_ps(in1, in2), halfMask.vec);
     fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = _mm_and_ps(in1*fehg, halfMask.vec);
+    Iv = _mm_and_ps(_mm_mul_ps(in1, fehg), halfMask.vec);
     Rs = _mm_hadd_ps(_mm_hadd_ps(Rv, zv),zv);
     Ivm = _mm_xor_ps( negMask.vec, Iv );
     Is = _mm_hadd_ps(_mm_hadd_ps(Ivm, zv),zv);
diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
index 0bb76f1d1..0c280eb6e 100644
--- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
@@ -26,8 +26,8 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVect
     
   for(;number < quarterPoints; number++){
     // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
-    y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
       
     // Calculate the ar*cr - ai*(-ci) portions
     realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
index 3e05608a4..a2c2b04f6 100644
--- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
@@ -29,8 +29,8 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t*
 
   for(;number < quarterPoints; number++){
     // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a));
-    y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b));
+    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
+    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
 
     // Calculate the ar*cr - ai*(-ci) portions
     realz = _mm_madd_epi16(x,y);
diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h
index 2c935d1fb..38263d5f7 100644
--- a/volk/include/volk/volk_common.h
+++ b/volk/include/volk/volk_common.h
@@ -91,4 +91,6 @@ union bit128{
   #endif
 };
 
+#define bit128_p(x) ((union bit128 *)(x))
+
 #endif /*INCLUDED_LIBVOLK_COMMON_H*/
author	Josh Blum	2011-11-14 11:30:59 -0800
committer	Josh Blum	2011-11-30 21:33:57 -0500
commit	52c51c983d51ff725238c22571b2d466875a5f22 (patch)
tree	f9cd62cbefdfed70182b88286c5e23cfbe170c84
parent	6bf88fd950a9606d41d611ace8368f319a4b06e0 (diff)
download	gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.tar.gz gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.tar.bz2 gnuradio-52c51c983d51ff725238c22571b2d466875a5f22.zip