33 files changed, 113 insertions, 93 deletions
diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h
index e75d54ec4..00d29b112 100644
--- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h
+++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H
 #define INCLUDED_volk_16ic_magnitude_16i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
@@ -25,8 +26,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co
 
   __m128 cplxValue1, cplxValue2, result;
 
-  float inputFloatBuffer[8] __attribute__((aligned(128)));
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
 
@@ -96,8 +97,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con
 
   __m128 cplxValue1, cplxValue2, iValue, qValue, result;
 
-  float inputFloatBuffer[4] __attribute__((aligned(128)));
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4];
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
 
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
index dcb2499fa..a4f0689e5 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl
     __m128 invScalar = _mm_set_ps1(1.0/scalar);
     int16_t* complexVectorPtr = (int16_t*)complexVector;
 
-    float floatBuffer[8] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
 
     for(;number < quarterPoints; number++){
       
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
index f21fe77f8..564aa1f5d 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -72,7 +73,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer,
   __m128 invScalar = _mm_set_ps1(iScalar);
   int16_t* complexVectorPtr = (int16_t*)complexVector;
 
-  float floatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
   for(;number < quarterPoints; number++){
     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
index 388d2ebcd..637ba9fd0 100644
--- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
@@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector,
 
   __m128 cplxValue1, cplxValue2, result;
 
-  float inputFloatBuffer[8] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
 
   for(;number < quarterPoints; number++){
 
@@ -91,7 +92,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector,
 
   __m128 cplxValue1, cplxValue2, result, re, im;
 
-  float inputFloatBuffer[8] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
 
   for(;number < quarterPoints; number++){
     inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h
index 6a85e066e..94aff3a49 100644
--- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h
+++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H
 #define INCLUDED_volk_32f_accumulator_s32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -18,7 +19,7 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float*
   const unsigned int quarterPoints = num_points / 4;
 
   const float* aPtr = inputBuffer;
-  float tempBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
   
   __m128 accumulator = _mm_setzero_ps();
   __m128 aVal = _mm_setzero_ps();
diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h
index 3934d2db7..5c19bfca0 100644
--- a/volk/include/volk/volk_32f_index_max_16u_a16.h
+++ b/volk/include/volk/volk_32f_index_max_16u_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_index_max_16u_a16_H
 #define INCLUDED_volk_32f_index_max_16u_a16_H
 
+#include <volk/volk_attributes.h>
 #include <volk/volk_common.h>
 #include <inttypes.h>
 #include <stdio.h>
@@ -25,8 +26,8 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const
     __m128 compareResults;
     __m128 currentValues;
 
-    float maxValuesBuffer[4] __attribute__((aligned(16)));
-    float maxIndexesBuffer[4] __attribute__((aligned(16)));
+    __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
 
     for(;number < quarterPoints; number++){
 
@@ -83,8 +84,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl
     __m128 compareResults;
     __m128 currentValues;
 
-    float maxValuesBuffer[4] __attribute__((aligned(16)));
-    float maxIndexesBuffer[4] __attribute__((aligned(16)));
+    __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
 
     for(;number < quarterPoints; number++){
 
diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
index 55d4e0319..70ab3ccdb 100644
--- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
 #define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -21,7 +22,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no
   const unsigned int quarterPoints = num_points / 4;
 
   const float* dataPointsPtr = realDataPoints;
-  float avgPointsVector[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4];
     
   __m128 dataPointsVal;
   __m128 avgPointsVal = _mm_setzero_ps();
@@ -87,7 +88,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no
   sumMean += avgPointsVector[3];
 
   // Calculate the number of valid bins from the remaning count
-  float validBinCountVector[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4];
   _mm_store_ps(validBinCountVector, vValidBinCount);
 
   float validBinCount = 0;
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
index 9d1d0ef4d..71b53ba3a 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H
 #define INCLUDED_volk_32f_s32f_convert_16i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -63,7 +64,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_load_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index 06228ef7d..dec3f1611 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -65,7 +65,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_loadu_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
index 82c74bf44..095d7bd35 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H
 #define INCLUDED_volk_32f_s32f_convert_32i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -59,7 +60,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_load_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index 253a48ae3..b4e954dc4 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_loadu_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
index 8dab0cdf4..509a46609 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H
 #define INCLUDED_volk_32f_s32f_convert_8i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -70,7 +71,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_load_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index 72b193c9d..1c6bf87c9 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -72,7 +72,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
   __m128 vScalar = _mm_set_ps1(scalar);
   __m128 ret;
 
-  float outputFloatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
 
   for(;number < quarterPoints; number++){
     ret = _mm_loadu_ps(inputVectorPtr);
diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
index 48d2fe1fe..779ae2d39 100644
--- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H
 #define INCLUDED_volk_32f_s32f_stddev_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
@@ -22,7 +23,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa
 
     const float* aPtr = inputBuffer;
 
-    float squareBuffer[4] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
 
     __m128 squareAccumulator = _mm_setzero_ps();
     __m128 aVal1, aVal2, aVal3, aVal4;
@@ -82,7 +83,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float*
 
     const float* aPtr = inputBuffer;
 
-    float squareBuffer[4] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
 
     __m128 squareAccumulator = _mm_setzero_ps();
     __m128 aVal = _mm_setzero_ps();
diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
index f1cb2ae0e..9605322d3 100644
--- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
+++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
@@ -22,8 +23,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo
     const unsigned int sixteenthPoints = num_points / 16;
 
     const float* aPtr = inputBuffer;
-    float meanBuffer[4] __attribute__((aligned(128)));
-    float squareBuffer[4] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
 
     __m128 accumulator = _mm_setzero_ps();
     __m128 squareAccumulator = _mm_setzero_ps();
@@ -95,8 +96,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float*
     const unsigned int quarterPoints = num_points / 4;
 
     const float* aPtr = inputBuffer;
-    float meanBuffer[4] __attribute__((aligned(128)));
-    float squareBuffer[4] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
+    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
 
     __m128 accumulator = _mm_setzero_ps();
     __m128 squareAccumulator = _mm_setzero_ps();
diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
index d13f12e51..93151260f 100644
--- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
 #define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include<stdio.h>
 
 
@@ -53,7 +54,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const  float
     bPtr += 4;
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
 
   _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector
 
@@ -102,7 +103,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float
     bPtr += 4;
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
   dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal);
 
   _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector
@@ -163,7 +164,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo
     dotProdVal = _mm_add_ps(dotProdVal, cVal1);
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
   _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector
 
   dotProduct = dotProductVector[0];
diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
index 7c1136a67..7f47122ff 100644
--- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
@@ -53,7 +53,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  float*
     bPtr += 4;
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
 
   _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector
 
@@ -102,7 +102,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float *
     bPtr += 4;
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
   dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal);
 
   _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector
@@ -163,7 +163,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float
     dotProdVal = _mm_add_ps(dotProdVal, cVal1);
   }
 
-  float dotProductVector[4] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
   _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector
 
   dotProduct = dotProductVector[0];
diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
index e98735245..cab3db50d 100644
--- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
+++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -85,7 +86,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe
 
     int16_t* complexVectorPtr = (int16_t*)complexVector;
 
-    float floatBuffer[4] __attribute__((aligned(128)));
+    __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
     for(;number < quarterPoints; number++){
       iValue = _mm_load_ps(iBufferPtr);
diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
index 1e3e61e08..304515a5c 100644
--- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -24,7 +25,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer
 
   __m128 cplxValue1, cplxValue2, iValue;
 
-  float floatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
   for(;number < quarterPoints; number++){
     cplxValue1 = _mm_load_ps(complexVectorPtr);
diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
index 14318ab01..96afa5ae9 100644
--- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
@@ -25,7 +26,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto
 
   __m128 cplxValue1, cplxValue2, result;
 
-  float floatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
   for(;number < quarterPoints; number++){
     cplxValue1 = _mm_load_ps(complexVectorPtr);
@@ -80,7 +81,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector
 
   __m128 cplxValue1, cplxValue2, iValue, qValue, result;
 
-  float floatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
   for(;number < quarterPoints; number++){
     cplxValue1 = _mm_load_ps(complexVectorPtr);
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
index d78faf5b5..78e28c903 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
 #define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
 
+#include <volk/volk_attributes.h>
 #include<volk/volk_complex.h>
 #include<stdio.h>
 
@@ -64,7 +65,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r
 
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
-  static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+  __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
   
 
 
@@ -205,7 +206,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul
 #if LV_HAVE_SSE && LV_HAVE_32
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
-  static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+  __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
 
   int bound = num_bytes >> 4;
   int leftovers = num_bytes % 16;
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
index 69781f0fb..73576a766 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -66,7 +66,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res
 
 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
 
-  static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+  __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
 
   union HalfMask {
     uint32_t intRep[4];
diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
index b7b9768ab..d404ee684 100644
--- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
 #define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
 
+#include <volk/volk_attributes.h>
 #include <volk/volk_complex.h>
 #include <stdio.h>
 #include <string.h>
@@ -358,7 +359,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const
     b += 2;
   }
 
-  lv_32fc_t dotProductVector[2] __attribute__((aligned(16)));
+  __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
 
   _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
 
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
index 80032d2fe..d68d2462a 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -95,7 +96,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo
   __m128 invScalar = _mm_set_ps1(1.0/scalar);
   int8_t* complexVectorPtr = (int8_t*)complexVector;
 
-  float floatBuffer[8] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
 
   for(;number < quarterPoints; number++){
     floatBuffer[0] = (float)(complexVectorPtr[0]);
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
index 47a968ac1..d2cfa42f6 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
 
+#include <volk/volk_attributes.h>
 #include <inttypes.h>
 #include <stdio.h>
 
@@ -81,7 +82,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c
   __m128 invScalar = _mm_set_ps1(iScalar);
   int8_t* complexVectorPtr = (int8_t*)complexVector;
 
-  float floatBuffer[4] __attribute__((aligned(128)));
+  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
 
   for(;number < quarterPoints; number++){
     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
index 154aa0f17..5d5eb7e18 100644
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() {
   double total;
   const int vlen = 3200;
   const int ITERS = 100000;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short input1[vlen] __attribute__ ((aligned (16)));
-  short input2[vlen] __attribute__ ((aligned (16)));
-  short input3[vlen] __attribute__ ((aligned (16)));
-  short input4[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) short input0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input1[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input3[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input4[vlen];
   
-  short output0[vlen] __attribute__ ((aligned (16)));
-  short output1[vlen] __attribute__ ((aligned (16)));
-  short output2[vlen] __attribute__ ((aligned (16)));
-  short output3[vlen] __attribute__ ((aligned (16)));
-  short output01[vlen] __attribute__ ((aligned (16)));
-  short output11[vlen] __attribute__ ((aligned (16)));
-  short output21[vlen] __attribute__ ((aligned (16)));
-  short output31[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) short output0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output1[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output3[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output01[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output11[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output21[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output31[vlen];
 
   for(int i = 0; i < vlen; ++i) {
     short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
index 62deffaeb..2e6e6a1a0 100644
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
   clock_t start, end;
   double total;
   
-  short target[vlen] __attribute__ ((aligned (16)));
-  short target2[vlen] __attribute__ ((aligned (16)));
-  short target3[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) short target[vlen];
+  __VOLK_ATTR_ALIGNED(16) short target2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short target3[vlen];
   
-  short src0[vlen] __attribute__ ((aligned (16)));
-  short permute_indexes[vlen] __attribute__ ((aligned (16))) =  {
+  __VOLK_ATTR_ALIGNED(16) short src0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] =  {
 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
-  short cntl0[vlen] __attribute__ ((aligned (16))) = {
+  __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
     0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
-  short cntl1[vlen] __attribute__ ((aligned (16))) = {
+  __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
     0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
-  short cntl2[vlen] __attribute__ ((aligned (16))) = {
+  __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
     0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
-  short cntl3[vlen] __attribute__ ((aligned (16))) =  {
+  __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] =  {
     0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
-  short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+  __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
   
   
 
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
index 819b2256b..3cd4e906d 100644
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
   clock_t start, end;
   double total;
   
-  short target[vlen] __attribute__ ((aligned (16)));
-  short target2[vlen] __attribute__ ((aligned (16)));
-  short src0[vlen] __attribute__ ((aligned (16)));
-  short permute_indexes[vlen] __attribute__ ((aligned (16)));
-  short cntl0[vlen] __attribute__ ((aligned (16)));
-  short cntl1[vlen] __attribute__ ((aligned (16)));
-  short cntl2[vlen] __attribute__ ((aligned (16)));
-  short cntl3[vlen] __attribute__ ((aligned (16)));
-  short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+  __VOLK_ATTR_ALIGNED(16) short target[vlen];
+  __VOLK_ATTR_ALIGNED(16) short target2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short src0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
+  __VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
+  __VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
+  __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
 
   for(int i = 0; i < vlen; ++i) {
     src0[i] = i;
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
index 66f8c9afa..192a69e35 100644
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() {
 void qa_16s_quad_max_star_aligned16::t1() {
   const int vlen = 34;
   
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short input1[vlen] __attribute__ ((aligned (16)));
-  short input2[vlen] __attribute__ ((aligned (16)));
-  short input3[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) short input0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input1[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input2[vlen];
+  __VOLK_ATTR_ALIGNED(16) short input3[vlen];
 
-  short output0[vlen] __attribute__ ((aligned (16)));
-  short output1[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) short output0[vlen];
+  __VOLK_ATTR_ALIGNED(16) short output1[vlen];
 
   for(int i = 0; i < vlen; ++i) {
     short plus0 = (short) (rand() - (RAND_MAX/2));
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
index 592304f83..a2e7a85be 100644
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() {
   double total;
   const int vlen = 3201;
   const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) float input0[vlen];
   
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) float output0[vlen];
+  __VOLK_ATTR_ALIGNED(16) float output01[vlen];
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
index a3d0955bd..981bb19e6 100644
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
   double total;
   const int vlen = 3201;
   const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
   
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
+  __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
 
   const float scalar = vlen;
   const float rbw = 1.7;
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
index 618a82a02..c880260f2 100644
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() {
   double total;
 
   const int ITERS = 10000000;
-  uint32_t input0 __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) uint32_t input0;
   
-  uint32_t output0 __attribute__ ((aligned (16)));
-  uint32_t output01 __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) uint32_t output0;
+  __VOLK_ATTR_ALIGNED(16) uint32_t output01;
 
     input0 = ((uint32_t) (rand() - (RAND_MAX/2)));
     output0 = 0;
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
index 85ef58795..6be4e50ea 100644
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() {
   double total;
 
   const int ITERS = 10000000;
-  uint64_t input0 __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) uint64_t input0;
   
-  uint64_t output0 __attribute__ ((aligned (16)));
-  uint64_t output01 __attribute__ ((aligned (16)));
+  __VOLK_ATTR_ALIGNED(16) uint64_t output0;
+  __VOLK_ATTR_ALIGNED(16) uint64_t output01;
 
     input0 = ((uint64_t) (rand() - (RAND_MAX/2)));
     output0 = 0;