diff options
Diffstat (limited to 'volk')
33 files changed, 113 insertions, 93 deletions
diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h index e75d54ec4..00d29b112 100644 --- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H #define INCLUDED_volk_16ic_magnitude_16i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> #include <math.h> @@ -25,8 +26,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -96,8 +97,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float inputFloatBuffer[4] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h index dcb2499fa..a4f0689e5 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl __m128 invScalar = _mm_set_ps1(1.0/scalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h index f21fe77f8..564aa1f5d 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -72,7 +73,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, __m128 invScalar = _mm_set_ps1(iScalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h index 388d2ebcd..637ba9fd0 100644 --- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H #define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> #include <math.h> @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ @@ -91,7 +92,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result, re, im; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ inputFloatBuffer[0] = (float)(complexVectorPtr[0]); diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h index 6a85e066e..94aff3a49 100644 --- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H #define INCLUDED_volk_32f_accumulator_s32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -18,7 +19,7 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float tempBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float tempBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h index 3934d2db7..5c19bfca0 100644 --- a/volk/include/volk/volk_32f_index_max_16u_a16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_index_max_16u_a16_H #define INCLUDED_volk_32f_index_max_16u_a16_H +#include <volk/volk_attributes.h> #include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> @@ -25,8 +26,8 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -83,8 +84,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h index 55d4e0319..70ab3ccdb 100644 --- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H #define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -21,7 +22,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no const unsigned int quarterPoints = num_points / 4; const float* dataPointsPtr = realDataPoints; - float avgPointsVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4]; __m128 dataPointsVal; __m128 avgPointsVal = _mm_setzero_ps(); @@ -87,7 +88,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no sumMean += avgPointsVector[3]; // Calculate the number of valid bins from the remaning count - float validBinCountVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4]; _mm_store_ps(validBinCountVector, vValidBinCount); float validBinCount = 0; diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h index 9d1d0ef4d..71b53ba3a 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H #define INCLUDED_volk_32f_s32f_convert_16i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -63,7 +64,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index 06228ef7d..dec3f1611 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -65,7 +65,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h index 82c74bf44..095d7bd35 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H #define INCLUDED_volk_32f_s32f_convert_32i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -59,7 +60,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index 253a48ae3..b4e954dc4 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h index 8dab0cdf4..509a46609 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H #define INCLUDED_volk_32f_s32f_convert_8i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -70,7 +71,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index 72b193c9d..1c6bf87c9 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -72,7 +72,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h index 48d2fe1fe..779ae2d39 100644 --- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H #define INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> #include <math.h> @@ -22,7 +23,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal1, aVal2, aVal3, aVal4; @@ -82,7 +83,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h index f1cb2ae0e..9605322d3 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> #include <math.h> @@ -22,8 +23,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo const unsigned int sixteenthPoints = num_points / 16; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -95,8 +96,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h index d13f12e51..93151260f 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H #define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#include <volk/volk_attributes.h> #include<stdio.h> @@ -53,7 +54,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -102,7 +103,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -163,7 +164,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index 7c1136a67..7f47122ff 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -53,7 +53,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -102,7 +102,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -163,7 +163,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h index e98735245..cab3db50d 100644 --- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -85,7 +86,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ iValue = _mm_load_ps(iBufferPtr); diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h index 1e3e61e08..304515a5c 100644 --- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -24,7 +25,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer __m128 cplxValue1, cplxValue2, iValue; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h index 14318ab01..96afa5ae9 100644 --- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H #define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> #include <math.h> @@ -25,7 +26,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto __m128 cplxValue1, cplxValue2, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -80,7 +81,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h index d78faf5b5..78e28c903 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H #define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#include <volk/volk_attributes.h> #include<volk/volk_complex.h> #include<stdio.h> @@ -64,7 +65,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -205,7 +206,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul #if LV_HAVE_SSE && LV_HAVE_32 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; int bound = num_bytes >> 4; int leftovers = num_bytes % 16; diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h index 69781f0fb..73576a766 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -66,7 +66,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; union HalfMask { uint32_t intRep[4]; diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h index b7b9768ab..d404ee684 100644 --- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H #define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#include <volk/volk_attributes.h> #include <volk/volk_complex.h> #include <stdio.h> #include <string.h> @@ -358,7 +359,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const b += 2; } - lv_32fc_t dotProductVector[2] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h index 80032d2fe..d68d2462a 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -95,7 +96,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo __m128 invScalar = _mm_set_ps1(1.0/scalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(complexVectorPtr[0]); diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h index 47a968ac1..d2cfa42f6 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h @@ -1,6 +1,7 @@ #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#include <volk/volk_attributes.h> #include <inttypes.h> #include <stdio.h> @@ -81,7 +82,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c __m128 invScalar = _mm_set_ps1(iScalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 154aa0f17..5d5eb7e18 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() { double total; const int vlen = 3200; const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); - short input4[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); - short output2[vlen] __attribute__ ((aligned (16))); - short output3[vlen] __attribute__ ((aligned (16))); - short output01[vlen] __attribute__ ((aligned (16))); - short output11[vlen] __attribute__ ((aligned (16))); - short output21[vlen] __attribute__ ((aligned (16))); - short output31[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 62deffaeb..2e6e6a1a0 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short target3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; - short cntl0[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl1[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl2[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; - short cntl3[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 819b2256b..3cd4e906d 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))); - short cntl0[vlen] __attribute__ ((aligned (16))); - short cntl1[vlen] __attribute__ ((aligned (16))); - short cntl2[vlen] __attribute__ ((aligned (16))); - short cntl3[vlen] __attribute__ ((aligned (16))); - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; for(int i = 0; i < vlen; ++i) { src0[i] = i; diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 66f8c9afa..192a69e35 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = (short) (rand() - (RAND_MAX/2)); diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index 592304f83..a2e7a85be 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index a3d0955bd..981bb19e6 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen]; - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float scalar = vlen; const float rbw = 1.7; diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 618a82a02..c880260f2 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint32_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t input0; - uint32_t output0 __attribute__ ((aligned (16))); - uint32_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t output0; + __VOLK_ATTR_ALIGNED(16) uint32_t output01; input0 = ((uint32_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 85ef58795..6be4e50ea 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint64_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t input0; - uint64_t output0 __attribute__ ((aligned (16))); - uint64_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t output0; + __VOLK_ATTR_ALIGNED(16) uint64_t output01; input0 = ((uint64_t) (rand() - (RAND_MAX/2))); output0 = 0; |