116 files changed, 859 insertions, 778 deletions
diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt
index f333d939b..22c09b3f8 100644
--- a/volk/CMakeLists.txt
+++ b/volk/CMakeLists.txt
@@ -69,6 +69,7 @@ INSTALL(FILES
     ${CMAKE_BINARY_DIR}/include/volk/volk.h
     ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h
     ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h
+    ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h
 DESTINATION include/volk)
 
 ########################################################################
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index fd9507207..b9ac9ecc2 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -14,93 +14,93 @@ int main(int argc, char *argv[]) {
 
     std::vector<std::string> results;
     
-    //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results);
-    //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results);
-    VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results);
-    VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results);
+    //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000, &results);
+    //VOLK_PROFILE(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000, &results);
+    VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16ic_magnitude_16i_a, 1, 0, 204600, 100, &results);
+    VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 204600, 10000, &results);
     VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16i_convert_8i_a, 0, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results);
-    //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results);
-    //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results);
-    VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results);
-    VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results);
-    //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results);
+    VOLK_PROFILE(volk_16i_max_star_16i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a, 0, 0, 204600, 10000, &results);
+    //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 10000, &results);
+    //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 10000, &results);
+    VOLK_PROFILE(volk_16u_byteswap_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50, &results);
+    VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 204600, 100, &results);
+    //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000, &results);
     VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results);
-    VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results);
-    VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results);
+    VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_index_max_16u_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100, &results);
+    VOLK_PROFILE(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_convert_64f_a, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_s32f_convert_8i_a, 1, 128, 204600, 10000, &results);
     VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results);
-    //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results);
-    VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results);
-    VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results);
-    VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results);
+    //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000, &results);
+    VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 20460, 100, &results);
+    VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_x2_divide_32f_a, 1e-4, 0, 204600, 2000, &results);
+    VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 5000, &results);
     VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results);
-    //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results);
-    VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results);
-    VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results);
-    VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results);
-    VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results);
-    VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results);
-    VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results);
-    VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results);
-    VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results);
-    VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results);
-    VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results);
-    VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results);
-    VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results);
+    //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000, &results);
+    VOLK_PROFILE(volk_32f_index_max_16u_a, 0, 0, 204600, 5000, &results);
+    VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 204600, 3000, &results);
+    VOLK_PROFILE(volk_32f_x2_interleave_32fc_a, 0, 0, 204600, 5000, &results);
+    VOLK_PROFILE(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000, &results);
+    VOLK_PROFILE(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000, &results);
+    VOLK_PROFILE(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100, &results);
+    VOLK_PROFILE(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100, &results);
+    VOLK_PROFILE(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 204600, 3000, &results);
+    VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 204600, 3000, &results);
+    VOLK_PROFILE(volk_32f_x2_subtract_32f_a, 1e-4, 0, 204600, 5000, &results);
+    VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 204600, 5000, &results);
+    VOLK_PROFILE(volk_32i_x2_and_32i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32i_s32f_convert_32f_a, 1e-4, 100, 204600, 10000, &results);
     VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results);
-    //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results);
-    VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32i_x2_or_32i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_32u_byteswap_a, 0, 0, 204600, 2000, &results);
+    //VOLK_PROFILE(volk_32u_popcnt_a, 0, 0, 2046, 10000, &results);
+    VOLK_PROFILE(volk_64f_convert_32f_a, 1e-4, 0, 204600, 10000, &results);
     VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results);
-    VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results);
-    //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results);
-    VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results);
-    VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results);
-    VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results);
-    VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results);
-    VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
-    VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results);
-    VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results);
-    VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results);
+    VOLK_PROFILE(volk_64f_x2_max_64f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_64f_x2_min_64f_a, 1e-4, 0, 204600, 1000, &results);
+    VOLK_PROFILE(volk_64u_byteswap_a, 0, 0, 204600, 1000, &results);
+    //VOLK_PROFILE(volk_64u_popcnt_a, 0, 0, 2046, 10000, &results);
+    VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a, 0, 0, 204600, 3000, &results);
+    VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 204600, 3000, &results);
+    VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a, 0, 256, 204600, 3000, &results);
+    VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 204600, 3000, &results);
+    VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a, 0, 0, 204600, 10000, &results);
+    VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 204600, 400, &results);
+    VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 204600, 400, &results);
+    VOLK_PROFILE(volk_8i_convert_16i_a, 0, 0, 204600, 20000, &results);
     VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results);
-    VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results);
+    VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results);
     VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
 
     char path[256];
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml
index 977cc7924..f6822871f 100644
--- a/volk/gen/archs.xml
+++ b/volk/gen/archs.xml
@@ -7,6 +7,12 @@
 
 <arch name="altivec" type="powerpc">
   <flag>maltivec</flag>
+  <alignment>16</alignment>
+</arch>
+
+<arch name="neon" type="arm">
+  <flag>mfpu=neon -mfloat-abi=softfp -funsafe-math-optimizations</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="32" type="x86" no_test="true" >
@@ -31,6 +37,7 @@
   <shift>31</shift>
   <flag>m3dnow</flag>
   <val>1</val>
+  <alignment>8</alignment>
 </arch>
 
 <arch name="abm" type="x86">
@@ -39,6 +46,7 @@
   <reg>d</reg>
   <shift>5</shift>
   <flag>sse4.2</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="popcount" type="x86">
@@ -55,6 +63,7 @@
   <reg>d</reg>
   <shift>23</shift>
   <flag>mmmx</flag>
+  <alignment>8</alignment>
 </arch>
 
 
@@ -66,6 +75,7 @@
   <flag>msse</flag>
   <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
   <include>xmmintrin.h</include>
+  <alignment>16</alignment>
 </arch>
 
 
@@ -75,6 +85,7 @@
   <reg>d</reg>
   <shift>26</shift>
   <flag>msse2</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="orc" type="all">
@@ -91,6 +102,7 @@
   <flag>msse3</flag>
   <environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment>
   <include>pmmintrin.h</include>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="ssse3" type="x86">
@@ -99,6 +111,7 @@
   <reg>c</reg>
   <shift>9</shift>
   <flag>mssse3</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="sse4_a" type="x86">
@@ -107,6 +120,7 @@
   <reg>c</reg>
   <shift>6</shift>
   <flag>msse4a</flag>
+  <alignment>16</alignment>
 </arch>
 
 
@@ -116,6 +130,7 @@
   <reg>c</reg>
   <shift>19</shift>
   <flag>msse4.1</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="sse4_2" type="x86">
@@ -124,6 +139,7 @@
   <reg>c</reg>
   <shift>20</shift>
   <flag>msse4.2</flag>
+  <alignment>16</alignment>
 </arch>
 
 <arch name="avx" type="x86">
@@ -132,6 +148,7 @@
   <reg>c</reg>
   <shift>28</shift>
   <flag>mavx</flag>
+  <alignment>32</alignment>
 </arch>
 
 </grammar>
diff --git a/volk/gen/machines.xml b/volk/gen/machines.xml
index 1f8b201ae..b872b9fb1 100644
--- a/volk/gen/machines.xml
+++ b/volk/gen/machines.xml
@@ -20,6 +20,11 @@ This machine is intended to support the MSVC compiler on x86/amd64.
 The MSVC compiler has intrinsic support for SSE and SSE2,
 however it does not support the gcc style inline assembly.
 -->
+
+<machine name="neon">
+<archs>generic neon</archs>
+</machine>
+
 <machine name="sse2_only">
 <archs>generic mmx sse sse2</archs>
 </machine>
diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py
index e946152d0..19d679e71 100644
--- a/volk/gen/make_c.py
+++ b/volk/gen/make_c.py
@@ -66,6 +66,10 @@ struct volk_machine *get_machine(void) {
     }
 }
 
+unsigned int volk_get_alignment(void) {
+    return get_machine()->alignment;
+}
+
 """
     
     for i in range(len(functions)):
diff --git a/volk/gen/make_cpuid_c.py b/volk/gen/make_cpuid_c.py
index 2fdbaf304..eb88dcd7f 100644
--- a/volk/gen/make_cpuid_c.py
+++ b/volk/gen/make_cpuid_c.py
@@ -153,6 +153,45 @@ int i_can_has_%s () {
 }
 
 """ % (arch)
+
+        elif str(domarch.attributes["type"].value) == "arm":
+            arch = str(domarch.attributes["name"].value);
+            tempstring = tempstring + """\
+#if defined(__arm__) && defined(__linux__)
+#include <asm/hwcap.h>
+#include <linux/auxvec.h>
+#include <stdio.h>
+#define LOOK_FOR_NEON
+#endif
+
+int i_can_has_%s () {
+//it's linux-specific, but if you're compiling libvolk for NEON
+//on Windows you have other problems
+
+#ifdef LOOK_FOR_NEON
+    FILE *auxvec_f;
+    unsigned long auxvec[2];
+    unsigned int found_neon = 0;
+    auxvec_f = fopen("/proc/self/auxv", "rb");
+    if(!auxvec_f) return 0;
+    
+    //so auxv is basically 32b of ID and 32b of value
+    //so it goes like this
+    while(!found_neon && auxvec_f) {
+      fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
+      if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
+        found_neon = 1;
+    }
+    
+    fclose(auxvec_f);
+    return found_neon;
+
+#else
+    return 0;
+#endif
+}
+
+""" % (arch)
         
         elif str(domarch.attributes["type"].value) == "all":
             arch = str(domarch.attributes["name"].value);
diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py
index d4f5f01de..44e2ef3f2 100644
--- a/volk/gen/make_each_machine_c.py
+++ b/volk/gen/make_each_machine_c.py
@@ -18,7 +18,7 @@
 from volk_regexp import *
 import string
 
-def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist):
+def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist, alignment):
 
     #make the machine fcountlist and taglist a subset given the archs list
     machine_fcountlists = list()
@@ -38,6 +38,7 @@ def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglis
     tempstring += "struct volk_machine volk_machine_" + machine_name + " = {\n"
     tempstring += "    " + ' | '.join(["(1 << LV_" + arch.swapcase() + ")" for arch in archs]) + ",\n"
     tempstring += "    \"%s\",\n"%machine_name
+    tempstring += "    %s,\n"%alignment
 
     #fill in the description for each function
     for i in range(len(functions)):
@@ -51,8 +52,7 @@ def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglis
     tempstring += "};\n"
     return tempstring
 
-def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist):
-
+def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist, alignment):
     tempstring = r"""
 // This file is automatically generated by make_each_machine_c.py.
 // Do not edit this file.
@@ -77,8 +77,8 @@ def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist):
 %s
 #endif
 """%(
-    _make_each_machine_struct(machine_name, archs+["orc"], functions, fcountlist, taglist),
-    _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist)
+    _make_each_machine_struct(machine_name, archs+["orc"], functions, fcountlist, taglist, alignment),
+    _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist, alignment)
 )
 
     return tempstring
diff --git a/volk/gen/make_h.py b/volk/gen/make_h.py
index 3d5790de4..354e57258 100644
--- a/volk/gen/make_h.py
+++ b/volk/gen/make_h.py
@@ -21,9 +21,12 @@ struct volk_func_desc {
      const int *arch_defs;
      const int n_archs;
 };
+
+VOLK_API unsigned int volk_get_alignment(void);
+
 """
     for i in range(len(funclist)):
-        tempstring += "extern " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n"
+        tempstring += "extern VOLK_API " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n"
         tempstring += "extern VOLK_API void %s_manual%s;\n" % (funclist[i], arched_arglist[i])
         tempstring = strip_trailing(tempstring, " {")
         tempstring += "extern VOLK_API struct volk_func_desc %s_get_func_desc(void);\n" % (funclist[i])
diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py
index 563de18a6..a48caa89c 100644
--- a/volk/gen/make_machines_h.py
+++ b/volk/gen/make_machines_h.py
@@ -33,6 +33,7 @@ __VOLK_DECL_BEGIN
 struct volk_machine {
    const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format)
    const char *name;
+   const unsigned int alignment; //the maximum byte alignment required for functions in this library
 """
     for function in functions:
         tempstring += "    const char *%s_name;\n"%function
diff --git a/volk/gen/volk_register.py b/volk/gen/volk_register.py
index 62ebba3c2..75e5eeb87 100644
--- a/volk/gen/volk_register.py
+++ b/volk/gen/volk_register.py
@@ -59,7 +59,7 @@ functions = [];
 
 
 for line in mfile:
-    subline = re.search(".*_(a16|u)\.h.*", line);
+    subline = re.search(".*_(a|u)\.h.*", line);
     if subline:
         subsubline = re.search("(?<=volk_).*", subline.group(0));
         if subsubline:
@@ -74,7 +74,7 @@ datatypes = set(datatypes);
 for line in mfile:
     for dt in datatypes:
         if dt in line:
-            subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line);
+            subline = re.search("(volk_" + dt +"_.*(a|u).*\.h)", line);
             if subline:
                 
                 subsubline = re.search(".+(?=\.h)", subline.group(0));
@@ -97,6 +97,12 @@ archflags_dict = {}
 for filearch in filearchs:
     archflags_dict[str(filearch.attributes["name"].value)] = str(filearch.getElementsByTagName("flag")[0].firstChild.data)
 
+archalign_dict = {}
+for filearch in filearchs:
+    alignelem = filearch.getElementsByTagName("alignment")
+    if(alignelem):
+        archalign_dict[str(filearch.attributes["name"].value)] = int(alignelem[0].firstChild.data)
+
 archs_or = "("
 for arch in archs:
     archs_or = archs_or + string.upper(arch) + "|";
@@ -132,11 +138,14 @@ for machine_name in machine_str_dict:
 
     if not already_done:
         machines[machine_name] = marchlist
- 
-#for machine_name in machines:
-#    print machine_name + ": " + str(machines[machine_name])
 
-#ok, now we have all the machines we're going to build. next step is to generate a Makefile.am where they're all laid out and compiled
+#get the maximum alignment for all archs in a machine
+machine_alignment_dict = {}
+for machine in machines:
+    machine_alignment_dict[machine] = max((archalign_dict.get(k, 1)) for k in machines[machine])
+
+#for machine in machine_alignment_dict:
+#    print machine + ": %d" % machine_alignment_dict[machine]
 
 taglist = [];
 fcountlist = [];
@@ -295,5 +304,5 @@ outfile_h.close()
 for machine in machines:
     machine_c_filename = os.path.join(gendir, "lib/volk_machine_" + machine + ".c")
     outfile_machine_c = open(machine_c_filename, "w")
-    outfile_machine_c.write(make_each_machine_c(machine, machines[machine], functions, fcountlist, taglist))
+    outfile_machine_c.write(make_each_machine_c(machine, machines[machine], functions, fcountlist, taglist, machine_alignment_dict[machine]))
     outfile_machine_c.close()
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index 5f9e134bc..b7da9b37c 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -33,91 +33,91 @@ volkinclude_HEADERS = \
 	$(top_gendir)/include/volk/volk_typedefs.h \
 	$(top_gendir)/include/volk/volk.h \
 	$(top_gendir)/include/volk/volk_cpu.h \
-	volk_16i_x5_add_quad_16i_x4_a16.h \
-	volk_16i_branch_4_state_8_a16.h \
-	volk_16ic_deinterleave_16i_x2_a16.h \
-	volk_16ic_s32f_deinterleave_32f_x2_a16.h \
-	volk_16ic_deinterleave_real_16i_a16.h \
-	volk_16ic_s32f_deinterleave_real_32f_a16.h \
-	volk_16ic_deinterleave_real_8i_a16.h \
-	volk_16ic_magnitude_16i_a16.h \
-	volk_16ic_s32f_magnitude_32f_a16.h \
-	volk_16i_s32f_convert_32f_a16.h \
+	volk_16i_x5_add_quad_16i_x4_a.h \
+	volk_16i_branch_4_state_8_a.h \
+	volk_16ic_deinterleave_16i_x2_a.h \
+	volk_16ic_s32f_deinterleave_32f_x2_a.h \
+	volk_16ic_deinterleave_real_16i_a.h \
+	volk_16ic_s32f_deinterleave_real_32f_a.h \
+	volk_16ic_deinterleave_real_8i_a.h \
+	volk_16ic_magnitude_16i_a.h \
+	volk_16ic_s32f_magnitude_32f_a.h \
+	volk_16i_s32f_convert_32f_a.h \
 	volk_16i_s32f_convert_32f_u.h \
-	volk_16i_convert_8i_a16.h \
+	volk_16i_convert_8i_a.h \
 	volk_16i_convert_8i_u.h \
-	volk_16i_max_star_16i_a16.h \
-	volk_16i_max_star_horizontal_16i_a16.h \
-	volk_16i_permute_and_scalar_add_a16.h \
-	volk_16i_x4_quad_max_star_16i_a16.h \
-	volk_16u_byteswap_a16.h \
-	volk_32f_accumulator_s32f_a16.h \
-	volk_32f_x2_add_32f_a16.h \
-	volk_32fc_32f_multiply_32fc_a16.h \
-	volk_32fc_s32f_power_32fc_a16.h \
-	volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \
-	volk_32fc_s32f_atan2_32f_a16.h \
-	volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \
+	volk_16i_max_star_16i_a.h \
+	volk_16i_max_star_horizontal_16i_a.h \
+	volk_16i_permute_and_scalar_add_a.h \
+	volk_16i_x4_quad_max_star_16i_a.h \
+	volk_16u_byteswap_a.h \
+	volk_32f_accumulator_s32f_a.h \
+	volk_32f_x2_add_32f_a.h \
+	volk_32fc_32f_multiply_32fc_a.h \
+	volk_32fc_s32f_power_32fc_a.h \
+	volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \
+	volk_32fc_s32f_atan2_32f_a.h \
+	volk_32fc_x2_conjugate_dot_prod_32fc_a.h \
 	volk_32fc_x2_conjugate_dot_prod_32fc_u.h \
-	volk_32fc_deinterleave_32f_x2_a16.h \
-	volk_32fc_deinterleave_64f_x2_a16.h \
-	volk_32fc_s32f_deinterleave_real_16i_a16.h \
-	volk_32fc_deinterleave_real_32f_a16.h \
-	volk_32fc_deinterleave_real_64f_a16.h \
-	volk_32fc_x2_dot_prod_32fc_a16.h \
-	volk_32fc_index_max_16u_a16.h \
-	volk_32fc_s32f_magnitude_16i_a16.h \
-	volk_32fc_magnitude_32f_a16.h \
-	volk_32fc_x2_multiply_32fc_a16.h \
-	volk_32f_s32f_convert_16i_a16.h \
+	volk_32fc_deinterleave_32f_x2_a.h \
+	volk_32fc_deinterleave_64f_x2_a.h \
+	volk_32fc_s32f_deinterleave_real_16i_a.h \
+	volk_32fc_deinterleave_real_32f_a.h \
+	volk_32fc_deinterleave_real_64f_a.h \
+	volk_32fc_x2_dot_prod_32fc_a.h \
+	volk_32fc_index_max_16u_a.h \
+	volk_32fc_s32f_magnitude_16i_a.h \
+	volk_32fc_magnitude_32f_a.h \
+	volk_32fc_x2_multiply_32fc_a.h \
+	volk_32f_s32f_convert_16i_a.h \
 	volk_32f_s32f_convert_16i_u.h \
-	volk_32f_s32f_convert_32i_a16.h \
+	volk_32f_s32f_convert_32i_a.h \
 	volk_32f_s32f_convert_32i_u.h \
-	volk_32f_convert_64f_a16.h \
+	volk_32f_convert_64f_a.h \
 	volk_32f_convert_64f_u.h \
-	volk_32f_s32f_convert_8i_a16.h \
+	volk_32f_s32f_convert_8i_a.h \
 	volk_32f_s32f_convert_8i_u.h \
-	volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \
-	volk_32fc_s32f_power_spectrum_32f_a16.h \
-	volk_32fc_x2_square_dist_32f_a16.h \
-	volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \
-	volk_32f_x2_divide_32f_a16.h \
-	volk_32f_x2_dot_prod_32f_a16.h \
+	volk_32fc_s32f_x2_power_spectral_density_32f_a.h \
+	volk_32fc_s32f_power_spectrum_32f_a.h \
+	volk_32fc_x2_square_dist_32f_a.h \
+	volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h \
+	volk_32f_x2_divide_32f_a.h \
+	volk_32f_x2_dot_prod_32f_a.h \
 	volk_32f_x2_dot_prod_32f_u.h \
-	volk_32f_s32f_32f_fm_detect_32f_a16.h \
-	volk_32f_index_max_16u_a16.h \
-	volk_32f_x2_s32f_interleave_16ic_a16.h \
-	volk_32f_x2_interleave_32fc_a16.h \
-	volk_32f_x2_max_32f_a16.h \
-	volk_32f_x2_min_32f_a16.h \
-	volk_32f_x2_multiply_32f_a16.h \
-	volk_32f_s32f_normalize_a16.h \
-	volk_32f_s32f_power_32f_a16.h \
-	volk_32f_sqrt_32f_a16.h \
-	volk_32f_s32f_stddev_32f_a16.h \
-	volk_32f_stddev_and_mean_32f_x2_a16.h \
-	volk_32f_x2_subtract_32f_a16.h \
-	volk_32f_x3_sum_of_poly_32f_a16.h \
-	volk_32i_x2_and_32i_a16.h \
-	volk_32i_s32f_convert_32f_a16.h \
+	volk_32f_s32f_32f_fm_detect_32f_a.h \
+	volk_32f_index_max_16u_a.h \
+	volk_32f_x2_s32f_interleave_16ic_a.h \
+	volk_32f_x2_interleave_32fc_a.h \
+	volk_32f_x2_max_32f_a.h \
+	volk_32f_x2_min_32f_a.h \
+	volk_32f_x2_multiply_32f_a.h \
+	volk_32f_s32f_normalize_a.h \
+	volk_32f_s32f_power_32f_a.h \
+	volk_32f_sqrt_32f_a.h \
+	volk_32f_s32f_stddev_32f_a.h \
+	volk_32f_stddev_and_mean_32f_x2_a.h \
+	volk_32f_x2_subtract_32f_a.h \
+	volk_32f_x3_sum_of_poly_32f_a.h \
+	volk_32i_x2_and_32i_a.h \
+	volk_32i_s32f_convert_32f_a.h \
 	volk_32i_s32f_convert_32f_u.h \
-	volk_32i_x2_or_32i_a16.h \
-	volk_32u_byteswap_a16.h \
-	volk_32u_popcnt_a16.h \
-	volk_64f_convert_32f_a16.h \
+	volk_32i_x2_or_32i_a.h \
+	volk_32u_byteswap_a.h \
+	volk_32u_popcnt_a.h \
+	volk_64f_convert_32f_a.h \
 	volk_64f_convert_32f_u.h \
-	volk_64f_x2_max_64f_a16.h \
-	volk_64f_x2_min_64f_a16.h \
-	volk_64u_byteswap_a16.h \
-	volk_64u_popcnt_a16.h \
-	volk_8ic_deinterleave_16i_x2_a16.h \
-	volk_8ic_s32f_deinterleave_32f_x2_a16.h \
-	volk_8ic_deinterleave_real_16i_a16.h \
-	volk_8ic_s32f_deinterleave_real_32f_a16.h \
-	volk_8ic_deinterleave_real_8i_a16.h \
-	volk_8ic_x2_multiply_conjugate_16ic_a16.h \
-	volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \
-	volk_8i_convert_16i_a16.h \
+	volk_64f_x2_max_64f_a.h \
+	volk_64f_x2_min_64f_a.h \
+	volk_64u_byteswap_a.h \
+	volk_64u_popcnt_a.h \
+	volk_8ic_deinterleave_16i_x2_a.h \
+	volk_8ic_s32f_deinterleave_32f_x2_a.h \
+	volk_8ic_deinterleave_real_16i_a.h \
+	volk_8ic_s32f_deinterleave_real_32f_a.h \
+	volk_8ic_deinterleave_real_8i_a.h \
+	volk_8ic_x2_multiply_conjugate_16ic_a.h \
+	volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h \
+	volk_8i_convert_16i_a.h \
 	volk_8i_convert_16i_u.h \
-	volk_8i_s32f_convert_32f_a16.h \
+	volk_8i_s32f_convert_32f_a.h \
 	volk_8i_s32f_convert_32f_u.h 
diff --git a/volk/include/volk/volk_16i_branch_4_state_8_a16.h b/volk/include/volk/volk_16i_branch_4_state_8_a.h
index 5eb03b346..0424e66e9 100644
--- a/volk/include/volk/volk_16i_branch_4_state_8_a16.h
+++ b/volk/include/volk/volk_16i_branch_4_state_8_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H
-#define INCLUDED_volk_16i_branch_4_state_8_a16_H
+#ifndef INCLUDED_volk_16i_branch_4_state_8_a_H
+#define INCLUDED_volk_16i_branch_4_state_8_a_H
 
 
 #include<inttypes.h>
@@ -14,7 +14,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16i_branch_4_state_8_a16_ssse3(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
+static inline  void volk_16i_branch_4_state_8_a_ssse3(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
 	
   
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11;
@@ -138,7 +138,7 @@ static inline  void volk_16i_branch_4_state_8_a16_ssse3(short* target,  short* s
 #endif /*LV_HAVE_SSEs*/
 
 #ifdef LV_HAVE_GENERIC
-static inline  void volk_16i_branch_4_state_8_a16_generic(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
+static inline  void volk_16i_branch_4_state_8_a_generic(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
 	int i = 0;
 	
 	int bound = 4;
@@ -191,4 +191,4 @@ static inline  void volk_16i_branch_4_state_8_a16_generic(short* target,  short*
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/
+#endif /*INCLUDED_volk_16i_branch_4_state_8_a_H*/
diff --git a/volk/include/volk/volk_16i_convert_8i_a16.h b/volk/include/volk/volk_16i_convert_8i_a.h
index 4d51e5903..8046035c7 100644
--- a/volk/include/volk/volk_16i_convert_8i_a16.h
+++ b/volk/include/volk/volk_16i_convert_8i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_convert_8i_a16_H
-#define INCLUDED_volk_16i_convert_8i_a16_H
+#ifndef INCLUDED_volk_16i_convert_8i_a_H
+#define INCLUDED_volk_16i_convert_8i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param outputVector The 8 bit output data buffer
   \param num_points The number of data values to be converted
 */
-static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -52,7 +52,7 @@ static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int1
   \param outputVector The 8 bit output data buffer
   \param num_points The number of data values to be converted
 */
-static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -66,4 +66,4 @@ static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const i
 
 
 
-#endif /* INCLUDED_volk_16i_convert_8i_a16_H */
+#endif /* INCLUDED_volk_16i_convert_8i_a_H */
diff --git a/volk/include/volk/volk_16i_max_star_16i_a16.h b/volk/include/volk/volk_16i_max_star_16i_a.h
index 063444279..6a4f63708 100644
--- a/volk/include/volk/volk_16i_max_star_16i_a16.h
+++ b/volk/include/volk/volk_16i_max_star_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_max_star_16i_a16_H
-#define INCLUDED_volk_16i_max_star_16i_a16_H
+#ifndef INCLUDED_volk_16i_max_star_16i_a_H
+#define INCLUDED_volk_16i_max_star_16i_a_H
 
 
 #include<inttypes.h>
@@ -12,7 +12,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) {
+static inline  void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) {
 
 
   
@@ -87,7 +87,7 @@ static inline  void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0,
 
 #ifdef LV_HAVE_GENERIC
 
-static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -105,4 +105,4 @@ static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0,
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/
+#endif /*INCLUDED_volk_16i_max_star_16i_a_H*/
diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
index ece6adb40..f60b33a41 100644
--- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H
-#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H
+#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
+#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
 
 
 #include<inttypes.h>
@@ -12,7 +12,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) {
 
   const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
   const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d};
@@ -110,7 +110,7 @@ static inline  void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target,
 
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -127,4 +127,4 @@ static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target,
 
 #endif /*LV_HAVE_GENERIC*/
 
-#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/
+#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a_H*/
diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
index ae1a18157..de36cee80 100644
--- a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h
+++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H
-#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H
+#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a_H
+#define INCLUDED_volk_16i_permute_and_scalar_add_a_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<emmintrin.h>
 
-static inline  void volk_16i_permute_and_scalar_add_a16_sse2(short* target,  short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline  void volk_16i_permute_and_scalar_add_a_sse2(short* target,  short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
 	
 
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -117,7 +117,7 @@ static inline  void volk_16i_permute_and_scalar_add_a16_sse2(short* target,  sho
 
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -136,4 +136,4 @@ static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, sh
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/
+#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a_H*/
diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a.h
index 09bc252f0..0555fdf00 100644
--- a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h
+++ b/volk/include/volk/volk_16i_s32f_convert_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H
-#define INCLUDED_volk_16i_s32f_convert_32f_a16_H
+#ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
+#define INCLUDED_volk_16i_s32f_convert_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
     
@@ -68,7 +68,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -102,7 +102,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -116,4 +116,4 @@ static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, co
 
 
 
-#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */
+#endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
index 94e5eb986..2688aff04 100644
--- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h
+++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H
-#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H
+#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a_H
+#define INCLUDED_volk_16i_x4_quad_max_star_16i_a_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 
 #include<emmintrin.h>
 
-static inline  void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
+static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
 	
 
 
@@ -96,9 +96,9 @@ static inline  void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short*
 
 	/*asm volatile
 		(
-		 "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t"
+		 "volk_16i_x4_quad_max_star_16i_a_sse2_L1:\n\t"
 		 "cmp $0, %[bound]\n\t"
-		 "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t"
+		 "je volk_16i_x4_quad_max_star_16i_a_sse2_END\n\t"
 
 		 "movaps (%[src0]), %%xmm1\n\t"
 		 "movaps (%[src1]), %%xmm2\n\t"
@@ -143,9 +143,9 @@ static inline  void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short*
 
 		 "movaps %%xmm1, (%[target])\n\t"
 		 "addw $16, %[target]\n\t"
-		 "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t"
+		 "jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t"
 		 
-		 "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t"
+		 "volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t"
 		 :
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target)
 		 :
@@ -168,7 +168,7 @@ static inline  void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short*
 
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
+static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -188,4 +188,4 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, shor
 
 #endif /*LV_HAVE_GENERIC*/
 
-#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/
+#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a_H*/
diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
index c157bf64a..e4c9f17ed 100644
--- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h
+++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H
-#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H
+#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H
+#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<emmintrin.h>
 
-static inline  void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
   
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i *p_target0, *p_target1, *p_target2, *p_target3,  *p_src0, *p_src1, *p_src2, *p_src3, *p_src4;
@@ -65,9 +65,9 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short*
   }
     /*asm volatile
 		(
-		 ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t"
+		 ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1:\n\t"
 		 "cmp $0, %[bound]\n\t"
-		 "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t"
+		 "je .%=volk_16i_x5_add_quad_16i_x4_a_sse2_END\n\t"
 		 "movaps (%[src0]), %%xmm1\n\t"
 		 "movaps (%[src1]), %%xmm2\n\t"
 		 "movaps (%[src2]), %%xmm3\n\t"
@@ -91,8 +91,8 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short*
 		 "add $16, %[target1]\n\t"
 		 "add $16, %[target2]\n\t"
 		 "add $16, %[target3]\n\t"
-		 "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t"
-		 ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t"
+		 "jmp .%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1\n\t"
+		 ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_END:\n\t"
 		 :
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3)
 		 :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
@@ -113,7 +113,7 @@ static inline  void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short*
 
 #ifdef LV_HAVE_GENERIC
 
-static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -133,4 +133,4 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short
 
 
 
-#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/
+#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H*/
diff --git a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h
index 227a92303..cdd60235e 100644
--- a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H
-#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
+#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -61,7 +61,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -128,7 +128,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   int16_t* qBufferPtr = qBuffer;
@@ -148,11 +148,11 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
-static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points);
+extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16ic_deinterleave_16i_x2_a_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a_H */
diff --git a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h
index 35d0e8be2..2b99e068e 100644
--- a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H
-#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
+#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -55,7 +55,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -103,7 +103,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, co
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -117,4 +117,4 @@ static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer,
 
 
 
-#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */
diff --git a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h
index bdf5fc162..cd2fabb52 100644
--- a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H
-#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H
+#define INCLUDED_volk_16ic_deinterleave_real_8i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
@@ -66,7 +66,7 @@ static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, con
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   int16_t* complexVectorPtr = (int16_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
@@ -84,11 +84,11 @@ static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, c
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
-static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points);
+extern void volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16ic_deinterleave_real_8i_a_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a_H */
diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a.h
index 73c6f3390..a6951e967 100644
--- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h
+++ b/volk/include/volk/volk_16ic_magnitude_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H
-#define INCLUDED_volk_16ic_magnitude_16i_a16_H
+#ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
+#define INCLUDED_volk_16ic_magnitude_16i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
    
@@ -85,7 +85,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -161,7 +161,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   int16_t* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -181,11 +181,11 @@ static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector,
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
-static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
+extern void volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
+static inline void volk_16ic_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */
+#endif /* INCLUDED_volk_16ic_magnitude_16i_a_H */
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h
index e4a9015b4..e73d405e0 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
-#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
+#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
+#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
     \param scalar The data value to be divided against each input data value of the input complex vector
     \param num_points The number of complex data values to be deinterleaved
   */
-static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
     float* iBufferPtr = iBuffer;
     float* qBufferPtr = qBuffer;
 
@@ -78,7 +78,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl
     \param scalar The data value to be divided against each input data value of the input complex vector
     \param num_points The number of complex data values to be deinterleaved
   */
-static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -99,11 +99,11 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer
     \param scalar The data value to be divided against each input data value of the input complex vector
     \param num_points The number of complex data values to be deinterleaved
   */
-extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
-static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-    volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
+extern void volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */
+#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H */
diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h
index 993445995..1630cb0ed 100644
--- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
-#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
+#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
+#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -62,7 +62,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -108,7 +108,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer,
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -123,4 +123,4 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuff
 
 
 
-#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */
+#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h
index a136c0535..35406e2cb 100644
--- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
+++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
-#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
+#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
+#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -80,7 +80,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector,
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -149,7 +149,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector,
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -170,11 +170,11 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
-static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-    volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16ic_s32f_magnitude_32f_a_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */
+#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
diff --git a/volk/include/volk/volk_16u_byteswap_a16.h b/volk/include/volk/volk_16u_byteswap_a.h
index f393c05c5..75c7ef0f3 100644
--- a/volk/include/volk/volk_16u_byteswap_a16.h
+++ b/volk/include/volk/volk_16u_byteswap_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_16u_byteswap_a16_H
-#define INCLUDED_volk_16u_byteswap_a16_H
+#ifndef INCLUDED_volk_16u_byteswap_a_H
+#define INCLUDED_volk_16u_byteswap_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){
+static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points){
   unsigned int number = 0;
   uint16_t* inputPtr = intsToSwap;
   __m128i input, left, right, output;
@@ -49,7 +49,7 @@ static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){
+static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned int num_points){
   unsigned int point;
   uint16_t* inputPtr = intsToSwap;
   for(point = 0; point < num_points; point++){
@@ -67,11 +67,11 @@ static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
-static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){
-    volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points);
+extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
+static inline void volk_16u_byteswap_a_orc(uint16_t* intsToSwap, unsigned int num_points){
+    volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_16u_byteswap_a16_H */
+#endif /* INCLUDED_volk_16u_byteswap_a_H */
diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a.h
index dd24a1e29..7ce0d1c80 100644
--- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h
+++ b/volk/include/volk/volk_32f_accumulator_s32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H
-#define INCLUDED_volk_32f_accumulator_s32f_a16_H
+#ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
+#define INCLUDED_volk_32f_accumulator_s32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -13,7 +13,7 @@
   \param inputBuffer The buffer of data to be accumulated
   \param num_points The number of values in inputBuffer to be accumulated
 */
-static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
@@ -50,7 +50,7 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float*
   \param inputBuffer The buffer of data to be accumulated
   \param num_points The number of values in inputBuffer to be accumulated
 */
-static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_accumulator_s32f_a_generic(float* result, const float* inputBuffer, unsigned int num_points){
   const float* aPtr = inputBuffer;
   unsigned int number = 0;
   float returnValue = 0;
@@ -65,4 +65,4 @@ static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const fl
 
 
 
-#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */
+#endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */
diff --git a/volk/include/volk/volk_32f_convert_64f_a16.h b/volk/include/volk/volk_32f_convert_64f_a.h
index 8ca83220b..dda646409 100644
--- a/volk/include/volk/volk_32f_convert_64f_a16.h
+++ b/volk/include/volk/volk_32f_convert_64f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_convert_64f_a16_H
-#define INCLUDED_volk_32f_convert_64f_a16_H
+#ifndef INCLUDED_volk_32f_convert_64f_a_H
+#define INCLUDED_volk_32f_convert_64f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The float vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const flo
   \param fVector The float vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_a_generic(double* outputVector, const float* inputVector, unsigned int num_points){
   double* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const
 
 
 
-#endif /* INCLUDED_volk_32f_convert_64f_a16_H */
+#endif /* INCLUDED_volk_32f_convert_64f_a_H */
diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a.h
index af1f35348..3e0cf1d65 100644
--- a/volk/include/volk/volk_32f_index_max_16u_a16.h
+++ b/volk/include/volk/volk_32f_index_max_16u_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_index_max_16u_a16_H
-#define INCLUDED_volk_32f_index_max_16u_a16_H
+#ifndef INCLUDED_volk_32f_index_max_16u_a_H
+#define INCLUDED_volk_32f_index_max_16u_a_H
 
 #include <volk/volk_common.h>
 #include <volk/volk_common.h>
@@ -9,7 +9,7 @@
 #ifdef LV_HAVE_SSE4_1
 #include<smmintrin.h>
 
-static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
@@ -67,7 +67,7 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const
 #ifdef LV_HAVE_SSE
 #include<xmmintrin.h>
 
-static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
@@ -124,7 +124,7 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl
 #endif /*LV_HAVE_SSE*/
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     float max = src0[0];
     unsigned int index = 0;
@@ -146,4 +146,4 @@ static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, cons
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/
+#endif /*INCLUDED_volk_32f_index_max_16u_a_H*/
diff --git a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h
index 6efd21a37..b25df75a1 100644
--- a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H
-#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H
+#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
+#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
   \param num_noints The number of real values in the input vector.
 */
-static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
+static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
   if (num_points < 1) {
     return;
   }
@@ -87,7 +87,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector,
   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
   \param num_points The number of real values in the input vector.
 */
-static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
+static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
   if (num_points < 1) {
     return;
   }
@@ -117,4 +117,4 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVect
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h
index f5b388e6d..b1902a8c0 100644
--- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
-#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
+#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
+#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -17,7 +17,7 @@
   \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20
   \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
 */
-static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
+static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -128,7 +128,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no
   \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20
   \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
 */
-static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
+static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
   float sumMean = 0.0;
   unsigned int number;
   // find the sum (for mean), etc
@@ -165,4 +165,4 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
index 4acd2e13e..0a2b4f0f2 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H
-#define INCLUDED_volk_32f_s32f_convert_16i_a16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
+#define INCLUDED_volk_32f_s32f_convert_16i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
@@ -54,7 +54,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -94,7 +94,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -108,4 +108,4 @@ static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector,
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index 3f5044313..aa370e614 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H
-#define INCLUDED_volk_32f_s32f_convert_32i_a16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
+#define INCLUDED_volk_32f_s32f_convert_32i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_32i_a16_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
@@ -50,7 +50,7 @@ static inline void volk_32f_s32f_convert_32i_a16_avx(int32_t* outputVector, cons
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -86,7 +86,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -126,7 +126,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int32_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -140,4 +140,4 @@ static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector,
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
index c114ea38f..8d87a07d7 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H
-#define INCLUDED_volk_32f_s32f_convert_8i_a16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
+#define INCLUDED_volk_32f_s32f_convert_8i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int sixteenthPoints = num_points / 16;
@@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -101,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -115,4 +115,4 @@ static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, co
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_normalize_a16.h b/volk/include/volk/volk_32f_s32f_normalize_a.h
index e6195cd32..f5fd0d1db 100644
--- a/volk/include/volk/volk_32f_s32f_normalize_a16.h
+++ b/volk/include/volk/volk_32f_s32f_normalize_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H
-#define INCLUDED_volk_32f_s32f_normalize_a16_H
+#ifndef INCLUDED_volk_32f_s32f_normalize_a_H
+#define INCLUDED_volk_32f_s32f_normalize_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param num_points The number of values in vecBuffer
   \param scalar The scale value to be applied to each buffer value
 */
-static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* inputPtr = vecBuffer;
 
@@ -49,7 +49,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float
   \param bVector One of the vectors to be normalizeed
   \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector
 */
-static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_normalize_a_generic(float* vecBuffer, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* inputPtr = vecBuffer;
   const float invScalar = 1.0 / scalar;
@@ -68,14 +68,14 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f
   \param bVector One of the vectors to be normalizeed
   \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector
 */
-extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points);
-static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){
+extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points);
+static inline void volk_32f_s32f_normalize_a_orc(float* vecBuffer, const float scalar, unsigned int num_points){
     float invscalar = 1.0 / scalar;
-    volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
+    volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
 }
 #endif /* LV_HAVE_GENERIC */
 
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a.h
index ecff901e1..c4fa31bd1 100644
--- a/volk/include/volk/volk_32f_s32f_power_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_power_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H
-#define INCLUDED_volk_32f_s32f_power_32f_a16_H
+#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
+#define INCLUDED_volk_32f_s32f_power_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -76,7 +76,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -127,7 +127,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float*
     \param power The power value to be applied to each data point
     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
   */
-static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
   float* cPtr = cVector;
   const float* aPtr = aVector;
   unsigned int number = 0;
@@ -141,4 +141,4 @@ static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const flo
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */
diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h
index c2b903657..881067bdc 100644
--- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
+++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H
-#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H
+#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
+#define INCLUDED_volk_32f_s32f_stddev_32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     unsigned int number = 0;
@@ -75,7 +75,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     unsigned int number = 0;
@@ -120,7 +120,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float*
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     const float* aPtr = inputBuffer;
@@ -142,4 +142,4 @@ static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const flo
 
 
 
-#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */
+#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */
diff --git a/volk/include/volk/volk_32f_sqrt_32f_a16.h b/volk/include/volk/volk_32f_sqrt_32f_a.h
index a9ce76f88..e44c73cfd 100644
--- a/volk/include/volk/volk_32f_sqrt_32f_a16.h
+++ b/volk/include/volk/volk_32f_sqrt_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H
-#define INCLUDED_volk_32f_sqrt_32f_a16_H
+#ifndef INCLUDED_volk_32f_sqrt_32f_a_H
+#define INCLUDED_volk_32f_sqrt_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param aVector One of the vectors to be sqrted
   \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
 */
-static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){
+static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -47,7 +47,7 @@ static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVecto
   \param aVector One of the vectors to be sqrted
   \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
 */
-static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){
+static inline void volk_32f_sqrt_32f_a_generic(float* cVector, const float* aVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     unsigned int number = 0;
@@ -59,19 +59,19 @@ static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aV
 #endif /* LV_HAVE_GENERIC */
 
 #ifdef LV_HAVE_ORC
-extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int);
+extern void volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int);
 /*!
   \brief Sqrts the two input vectors and store their results in the third vector
   \param cVector The vector where the results will be stored
   \param aVector One of the vectors to be sqrted
   \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
 */
-static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){
-    volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points);
+static inline void volk_32f_sqrt_32f_a_orc(float* cVector, const float* aVector, unsigned int num_points){
+    volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);
 }
 
 #endif /* LV_HAVE_ORC */
 
 
 
-#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */
+#endif /* INCLUDED_volk_32f_sqrt_32f_a_H */
diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h
index 10d72e09d..3a82e3d2f 100644
--- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
+++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
-#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
+#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
+#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -88,7 +88,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -143,7 +143,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float*
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -167,4 +167,4 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, fl
 
 
 
-#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */
+#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
diff --git a/volk/include/volk/volk_32f_x2_add_32f_a16.h b/volk/include/volk/volk_32f_x2_add_32f_a.h
index 2de6a6644..3bc83653b 100644
--- a/volk/include/volk/volk_32f_x2_add_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_add_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H
-#define INCLUDED_volk_32f_x2_add_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_add_32f_a_H
+#define INCLUDED_volk_32f_x2_add_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be added
   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
 */
-static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec
   \param bVector One of the vectors to be added
   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
 */
-static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -71,11 +71,11 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float*
   \param bVector One of the vectors to be added
   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
 */
-extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_add_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_add_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x2_divide_32f_a16.h b/volk/include/volk/volk_32f_x2_divide_32f_a.h
index 1603e78de..52ddfae87 100644
--- a/volk/include/volk/volk_32f_x2_divide_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_divide_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H
-#define INCLUDED_volk_32f_x2_divide_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_divide_32f_a_H
+#define INCLUDED_volk_32f_x2_divide_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The divisor vector
   \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
 */
-static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_divide_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a
   \param bVector The divisor vector
   \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
 */
-static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_divide_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -71,12 +71,12 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa
   \param bVector The divisor vector
   \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
 */
-extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_divide_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_divide_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_divide_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
 
-#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_divide_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h
index 2cd974070..0c58f2ecf 100644
--- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
-#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a_H
+#define INCLUDED_volk_32f_x2_dot_prod_32f_a_H
 
 #include <volk/volk_common.h>
 #include<stdio.h>
@@ -8,7 +8,7 @@
 #ifdef LV_HAVE_GENERIC
 
 
-static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
 
   float dotProduct = 0;
   const float* aPtr = input;
@@ -28,7 +28,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const fl
 #ifdef LV_HAVE_SSE
 
 
-static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
   
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
@@ -78,7 +78,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const  float
 
 #include <pmmintrin.h>
 
-static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -125,7 +125,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float
 
 #include <smmintrin.h>
 
-static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int sixteenthPoints = num_points / 16;
 
@@ -182,4 +182,4 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo
 
 #endif /*LV_HAVE_SSE4_1*/
 
-#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/
+#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a_H*/
diff --git a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h
index f3731fa2a..1d4d2dbbd 100644
--- a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h
+++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H
-#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H
+#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
+#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param complexVector The complex output vector
   \param num_points The number of complex data values to be interleaved
 */
-static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
+static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
   unsigned int number = 0;
   float* complexVectorPtr = (float*)complexVector;
   const float* iBufferPtr = iBuffer;
@@ -56,7 +56,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector,
   \param complexVector The complex output vector
   \param num_points The number of complex data values to be interleaved
 */
-static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
+static inline void volk_32f_x2_interleave_32fc_a_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
   float* complexVectorPtr = (float*)complexVector;
   const float* iBufferPtr = iBuffer;
   const float* qBufferPtr = qBuffer;
@@ -72,4 +72,4 @@ static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVec
 
 
 
-#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */
+#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
diff --git a/volk/include/volk/volk_32f_x2_max_32f_a16.h b/volk/include/volk/volk_32f_x2_max_32f_a.h
index 60be6e36d..7948c458d 100644
--- a/volk/include/volk/volk_32f_x2_max_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_max_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H
-#define INCLUDED_volk_32f_x2_max_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_max_32f_a_H
+#define INCLUDED_volk_32f_x2_max_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_max_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -53,7 +53,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_max_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -75,11 +75,11 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float*
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_max_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_max_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_max_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x2_min_32f_a16.h b/volk/include/volk/volk_32f_x2_min_32f_a.h
index 3b8291531..d77134868 100644
--- a/volk/include/volk/volk_32f_x2_min_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_min_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H
-#define INCLUDED_volk_32f_x2_min_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_min_32f_a_H
+#define INCLUDED_volk_32f_x2_min_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_min_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -53,7 +53,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_min_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -75,11 +75,11 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float*
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_min_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a.h
index 885941abf..fae9a652f 100644
--- a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H
-#define INCLUDED_volk_32f_x2_multiply_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
+#define INCLUDED_volk_32f_x2_multiply_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -52,7 +52,7 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float*
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_32f_x2_multiply_32f_a16_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
 
@@ -90,7 +90,7 @@ static inline void volk_32f_x2_multiply_32f_a16_avx(float* cVector, const float*
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -110,11 +110,11 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_multiply_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h
index f7ad3fd18..cc02c3678 100644
--- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
+++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
-#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
+#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
+#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const float* iBufferPtr = iBuffer;
     const float* qBufferPtr = qBuffer;
@@ -73,7 +73,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const float* iBufferPtr = iBuffer;
     const float* qBufferPtr = qBuffer;
@@ -137,7 +137,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
   int16_t* complexVectorPtr = (int16_t*)complexVector;
   const float* iBufferPtr = iBuffer;
   const float* qBufferPtr = qBuffer;
@@ -153,4 +153,4 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* compl
 
 
 
-#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */
+#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
diff --git a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a.h
index c01f2c1f3..16cad008a 100644
--- a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h
+++ b/volk/include/volk/volk_32f_x2_subtract_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H
-#define INCLUDED_volk_32f_x2_subtract_32f_a16_H
+#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
+#define INCLUDED_volk_32f_x2_subtract_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be subtracted
   \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
 */
-static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float*
   \param bVector The vector to be subtracted
   \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
 */
-static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_subtract_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -71,11 +71,11 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl
   \param bVector The vector to be subtracted
   \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
 */
-extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_subtract_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */
+#endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */
diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
index 6e446cbef..2ea8fa96d 100644
--- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h
+++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H
-#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H
+#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H
+#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
+static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
   
   
   float result = 0.0;
@@ -100,7 +100,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* sr
 
 #ifdef LV_HAVE_GENERIC
 
-static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
+static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
 
 
     
@@ -148,4 +148,4 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float*
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/
+#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H*/
diff --git a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h
index 846315a4a..b7350b9fa 100644
--- a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H
-#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H
+#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
+#define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const
     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
   lv_32fc_t* cPtr = cVector;
   const lv_32fc_t* aPtr = aVector;
   const float* bPtr=  bVector;
@@ -84,12 +84,12 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c
     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
   */
-extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
-    volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32fc_32f_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+    volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_GENERIC */
 
 
 
-#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */
+#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h
index 3e7c3fa28..9de036ef4 100644
--- a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H
-#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
+#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float*
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, flo
 
 
 
-#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h
index 945a26742..29c369d9a 100644
--- a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H
-#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
+#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
 
     const float* complexVectorPtr = (float*)complexVector;
@@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   double* iBufferPtr = iBuffer;
@@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, do
 
 
 
-#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h
index 3c3fb2583..a1d0fd5d1 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H
-#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
+#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_32f_a_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, c
 
 
 
-#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h
index 40c1a7a46..70a3b1971 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H
-#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
+#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
 
   const float* complexVectorPtr = (float*)complexVector;
@@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, con
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_64f_a_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   double* iBufferPtr = iBuffer;
@@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer,
 
 
 
-#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
diff --git a/volk/include/volk/volk_32fc_index_max_16u_a16.h b/volk/include/volk/volk_32fc_index_max_16u_a.h
index 0ad1edbe9..312e034e2 100644
--- a/volk/include/volk/volk_32fc_index_max_16u_a16.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H
-#define INCLUDED_volk_32fc_index_max_16u_a16_H
+#ifndef INCLUDED_volk_32fc_index_max_16u_a_H
+#define INCLUDED_volk_32fc_index_max_16u_a_H
 
 #include <volk/volk_common.h>
 #include<inttypes.h>
@@ -11,7 +11,7 @@
 #include<pmmintrin.h>
 
 
-static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
+static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
   
   
   
@@ -189,7 +189,7 @@ static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32f
 #endif /*LV_HAVE_SSE3*/
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
+static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
   float sq_dist = 0.0;
   float max = 0.0;
   unsigned int index = 0;
@@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/
+#endif /*INCLUDED_volk_32fc_index_max_16u_a_H*/
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_a16.h b/volk/include/volk/volk_32fc_magnitude_32f_a.h
index 946190e41..f18e9bc0b 100644
--- a/volk/include/volk/volk_32fc_magnitude_32f_a16.h
+++ b/volk/include/volk/volk_32fc_magnitude_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H
-#define INCLUDED_volk_32fc_magnitude_32f_a16_H
+#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
+#define INCLUDED_volk_32fc_magnitude_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -103,7 +103,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -122,11 +122,11 @@ static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, c
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
-static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points);
+extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
+static inline void volk_32fc_magnitude_32f_a_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */
+#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h
index 55b1b6c70..9304b0c28 100644
--- a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H
-#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
+#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* outPtr = outputVector;
 
@@ -81,7 +81,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector,  con
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* outPtr = outputVector;
 
@@ -139,7 +139,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector,  const
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
   float* outPtr = outputVector;
   const float* inPtr = (float*)inputVector;
   const float invNormalizeFactor = 1.0 / normalizeFactor;
@@ -155,4 +155,4 @@ static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, con
 
 
 
-#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h
index 2460039d2..1c17fb70c 100644
--- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
-#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
+#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -63,7 +63,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   unsigned int number = 0;
@@ -78,4 +78,4 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBu
 
 
 
-#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h
index f67ab0607..38fd609d3 100644
--- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
-#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
+#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -70,7 +70,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -129,7 +129,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   int16_t* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -149,11 +149,11 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points);
-static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
-    volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_32fc_s32f_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h
index 155b93ca2..ec1d7167f 100644
--- a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h
@@ -1,12 +1,12 @@
-#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H
-#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H
+#define INCLUDED_volk_32fc_s32f_power_32fc_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
 #include <math.h>
 
 //! raise a complex float to a real float power
-static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a16(const lv_32fc_t exp, const float power){
+static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, const float power){
     const float arg = power*atan2f(lv_creal(exp), lv_cimag(exp));
     const float mag = powf(lv_creal(exp)*lv_creal(exp) + lv_cimag(exp)*lv_cimag(exp), power/2);
     return mag*lv_cmake(cosf(arg), sinf(arg));
@@ -26,7 +26,7 @@ static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a16(const lv_32fc_t exp, c
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
+static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -81,7 +81,7 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l
 #endif /* LV_HAVE_LIB_SIMDMATH */
 
   for(;number < num_points; number++){
-    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a16((*aPtr++), power);
+    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
   }
 }
 #endif /* LV_HAVE_SSE */
@@ -94,13 +94,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l
     \param power The power value to be applied to each data point
     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
   */
-static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
+static inline void volk_32fc_s32f_power_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
   lv_32fc_t* cPtr = cVector;
   const lv_32fc_t* aPtr = aVector;
   unsigned int number = 0;
 
   for(number = 0; number < num_points; number++){
-    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a16((*aPtr++), power);
+    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
   }
 }
 #endif /* LV_HAVE_GENERIC */
@@ -108,4 +108,4 @@ static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, con
 
 
 
-#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h
index 03da069c2..8d1959dae 100644
--- a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H
-#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
+#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param normalizationFactor This value is divided against all the input values before the power is calculated
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
   const float* inputPtr = (const float*)complexFFTInput;
   float* destPtr = logPowerOutput;
   uint64_t number = 0;
@@ -96,7 +96,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut
   \param normalizationFactor This value is divided agains all the input values before the power is calculated
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_power_spectrum_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
   // Calculate the Power of the complex point
   const float* inputPtr = (float*)complexFFTInput;
   float* realFFTDataPointsPtr = logPowerOutput;
@@ -123,4 +123,4 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPower
 
 
 
-#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h
index 5bcd7f7c4..fc635f171 100644
--- a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h
+++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H
-#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H
+#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
+#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -20,7 +20,7 @@
   \param rbw The resolution bandwith of the fft spectrum
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
+static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
   const float* inputPtr = (const float*)complexFFTInput;
   float* destPtr = logPowerOutput;
   uint64_t number = 0;
@@ -103,7 +103,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float*
   \param rbw The resolution bandwith of the fft spectrum
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
+static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
   // Calculate the Power of the complex point
   const float* inputPtr = (float*)complexFFTInput;
   float* realFFTDataPointsPtr = logPowerOutput;
@@ -131,4 +131,4 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(floa
 
 
 
-#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */
+#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
index f221237ff..a6c21336d 100644
--- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
-#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
+#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H
+#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H
 
 #include <volk/volk_common.h>
 #include<volk/volk_complex.h>
@@ -9,7 +9,7 @@
 #ifdef LV_HAVE_GENERIC
 
 
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   float * res = (float*) result;
   float * in = (float*) input;
@@ -63,7 +63,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r
 #if LV_HAVE_SSE && LV_HAVE_64
 
 
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
   
@@ -204,7 +204,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul
 #endif
 
 #if LV_HAVE_SSE && LV_HAVE_32
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
 
@@ -342,4 +342,4 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* re
 
 
 
-#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/
+#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H*/
diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
index 9657c8f6b..022a0a614 100644
--- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
-#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
+#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H
+#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H
 
 #include <volk/volk_common.h>
 #include <volk/volk_complex.h>
@@ -10,7 +10,7 @@
 #ifdef LV_HAVE_GENERIC 
 
 
-static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   float * res = (float*) result;
   float * in = (float*) input;
@@ -60,7 +60,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, con
 #if LV_HAVE_SSE && LV_HAVE_64
 
 
-static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
 
   asm 
@@ -195,7 +195,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, cons
 
 #if LV_HAVE_SSE && LV_HAVE_32
 
-static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   asm volatile 
     (
@@ -321,7 +321,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, cons
 
 #include <pmmintrin.h>
 
-static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
 
   lv_32fc_t dotProduct;
@@ -378,8 +378,8 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const
 
 #include <smmintrin.h>
 
-static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes);
+static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+  volk_32fc_x2_dot_prod_32fc_a_sse3(result, input, taps, num_bytes);
   // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now
    /* 
     __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
@@ -466,4 +466,4 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, cons
 
 #endif /*LV_HAVE_SSE4_1*/
 
-#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/
+#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H*/
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
index 72010b855..18dd092e8 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H
-#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H
+#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
+#define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -15,7 +15,7 @@
     \param bVector One of the vectors to be multiplied
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -61,7 +61,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const
     \param bVector One of the vectors to be multiplied
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
     lv_32fc_t* cPtr = cVector;
     const lv_32fc_t* aPtr = aVector;
     const lv_32fc_t* bPtr=  bVector;
@@ -81,9 +81,9 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co
     \param bVector One of the vectors to be multiplied
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
-extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
-static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-    volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
+static inline void volk_32fc_x2_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+    volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
@@ -91,4 +91,4 @@ static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const
 
 
 
-#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
index 910f51679..be7a4ffe9 100644
--- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h
+++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H
-#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H
+#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
+#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -10,7 +10,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
   
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
@@ -106,7 +106,7 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float*
 #endif /*LV_HAVE_SSE3*/
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
   int i = 0; 
@@ -123,4 +123,4 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(flo
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/
+#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H*/
diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
index 551f3cb53..c21d00491 100644
--- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h
+++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H
-#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H
+#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a_H
+#define INCLUDED_volk_32fc_x2_square_dist_32f_a_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -9,7 +9,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
   
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -92,7 +92,7 @@ static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_
 #endif /*LV_HAVE_SSE3*/
 
 #ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
   int i = 0; 
@@ -109,4 +109,4 @@ static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/
+#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a_H*/
diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a.h
index b744c7197..558142869 100644
--- a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h
+++ b/volk/include/volk/volk_32i_s32f_convert_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H
-#define INCLUDED_volk_32i_s32f_convert_32f_a16_H
+#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
+#define INCLUDED_volk_32i_s32f_convert_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -55,7 +55,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int32_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -70,4 +70,4 @@ static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, co
 
 
 
-#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */
+#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
diff --git a/volk/include/volk/volk_32i_x2_and_32i_a16.h b/volk/include/volk/volk_32i_x2_and_32i_a.h
index 4d50efd32..dcd63d98e 100644
--- a/volk/include/volk/volk_32i_x2_and_32i_a16.h
+++ b/volk/include/volk/volk_32i_x2_and_32i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H
-#define INCLUDED_volk_32i_x2_and_32i_a16_H
+#ifndef INCLUDED_volk_32i_x2_and_32i_a_H
+#define INCLUDED_volk_32i_x2_and_32i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors
   \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
 */
-static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t*
   \param bVector One of the vectors
   \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
 */
-static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_and_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     int32_t* cPtr = cVector;
     const int32_t* aPtr = aVector;
     const int32_t* bPtr=  bVector;
@@ -71,11 +71,11 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32
   \param bVector One of the vectors
   \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
 */
-extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
-static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
-    volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32i_x2_and_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+    volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */
+#endif /* INCLUDED_volk_32i_x2_and_32i_a_H */
diff --git a/volk/include/volk/volk_32i_x2_or_32i_a16.h b/volk/include/volk/volk_32i_x2_or_32i_a.h
index 9edbdbafd..243e8178c 100644
--- a/volk/include/volk/volk_32i_x2_or_32i_a16.h
+++ b/volk/include/volk/volk_32i_x2_or_32i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H
-#define INCLUDED_volk_32i_x2_or_32i_a16_H
+#ifndef INCLUDED_volk_32i_x2_or_32i_a_H
+#define INCLUDED_volk_32i_x2_or_32i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be ored
   \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
 */
-static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a
   \param bVector One of the vectors to be ored
   \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
 */
-static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_or_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     int32_t* cPtr = cVector;
     const int32_t* aPtr = aVector;
     const int32_t* bPtr=  bVector;
@@ -71,11 +71,11 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_
   \param bVector One of the vectors to be ored
   \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
 */
-extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
-static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
-    volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points);
+extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32i_x2_or_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+    volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
 
-#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */
+#endif /* INCLUDED_volk_32i_x2_or_32i_a_H */
diff --git a/volk/include/volk/volk_32u_byteswap_a16.h b/volk/include/volk/volk_32u_byteswap_a.h
index dc5cedab9..b88848096 100644
--- a/volk/include/volk/volk_32u_byteswap_a16.h
+++ b/volk/include/volk/volk_32u_byteswap_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_32u_byteswap_a16_H
-#define INCLUDED_volk_32u_byteswap_a16_H
+#ifndef INCLUDED_volk_32u_byteswap_a_H
+#define INCLUDED_volk_32u_byteswap_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){
+static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
   unsigned int number = 0;
 
   uint32_t* inputPtr = intsToSwap;
@@ -57,7 +57,7 @@ static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){
+static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
   uint32_t* inputPtr = intsToSwap;
 
   unsigned int point;
@@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned
 
 
 
-#endif /* INCLUDED_volk_32u_byteswap_a16_H */
+#endif /* INCLUDED_volk_32u_byteswap_a_H */
diff --git a/volk/include/volk/volk_32u_popcnt_a16.h b/volk/include/volk/volk_32u_popcnt_a.h
index 0d8b48fd5..b72d605c6 100644
--- a/volk/include/volk/volk_32u_popcnt_a16.h
+++ b/volk/include/volk/volk_32u_popcnt_a.h
@@ -7,7 +7,7 @@
 
 #ifdef LV_HAVE_GENERIC
 
-static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) {
+static inline void volk_32u_popcnt_a_generic(uint32_t* ret, const uint32_t value) {
 
   // This is faster than a lookup table
   uint32_t retVal = value;
@@ -27,7 +27,7 @@ static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t val
 
 #include <nmmintrin.h>
 
-static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) {
+static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value) {
   *ret = _mm_popcnt_u32(value);
 }
 
diff --git a/volk/include/volk/volk_64f_convert_32f_a16.h b/volk/include/volk/volk_64f_convert_32f_a.h
index cfcdbdc3a..2126e4f95 100644
--- a/volk/include/volk/volk_64f_convert_32f_a16.h
+++ b/volk/include/volk/volk_64f_convert_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_64f_convert_32f_a16_H
-#define INCLUDED_volk_64f_convert_32f_a16_H
+#ifndef INCLUDED_volk_64f_convert_32f_a_H
+#define INCLUDED_volk_64f_convert_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The double vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const doub
   \param fVector The double vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_a_generic(float* outputVector, const double* inputVector, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const double* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const d
 
 
 
-#endif /* INCLUDED_volk_64f_convert_32f_a16_H */
+#endif /* INCLUDED_volk_64f_convert_32f_a_H */
diff --git a/volk/include/volk/volk_64f_x2_max_64f_a16.h b/volk/include/volk/volk_64f_x2_max_64f_a.h
index 21f488bf7..61a704c52 100644
--- a/volk/include/volk/volk_64f_x2_max_64f_a16.h
+++ b/volk/include/volk/volk_64f_x2_max_64f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H
-#define INCLUDED_volk_64f_x2_max_64f_a16_H
+#ifndef INCLUDED_volk_64f_x2_max_64f_a_H
+#define INCLUDED_volk_64f_x2_max_64f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_max_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -53,7 +53,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_max_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     double* cPtr = cVector;
     const double* aPtr = aVector;
     const double* bPtr=  bVector;
@@ -68,4 +68,4 @@ static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double
 #endif /* LV_HAVE_GENERIC */
 
 
-#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */
+#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
diff --git a/volk/include/volk/volk_64f_x2_min_64f_a16.h b/volk/include/volk/volk_64f_x2_min_64f_a.h
index 8711a0eae..148b72c59 100644
--- a/volk/include/volk/volk_64f_x2_min_64f_a16.h
+++ b/volk/include/volk/volk_64f_x2_min_64f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H
-#define INCLUDED_volk_64f_x2_min_64f_a16_H
+#ifndef INCLUDED_volk_64f_x2_min_64f_a_H
+#define INCLUDED_volk_64f_x2_min_64f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_min_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -53,7 +53,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_min_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     double* cPtr = cVector;
     const double* aPtr = aVector;
     const double* bPtr=  bVector;
@@ -68,4 +68,4 @@ static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double
 #endif /* LV_HAVE_GENERIC */
 
 
-#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */
+#endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
diff --git a/volk/include/volk/volk_64u_byteswap_a16.h b/volk/include/volk/volk_64u_byteswap_a.h
index b4bed8451..d4fc74a6e 100644
--- a/volk/include/volk/volk_64u_byteswap_a16.h
+++ b/volk/include/volk/volk_64u_byteswap_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_64u_byteswap_a16_H
-#define INCLUDED_volk_64u_byteswap_a16_H
+#ifndef INCLUDED_volk_64u_byteswap_a_H
+#define INCLUDED_volk_64u_byteswap_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){
+static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
     uint32_t* inputPtr = (uint32_t*)intsToSwap;
     __m128i input, byte1, byte2, byte3, byte4, output;
     __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
@@ -65,7 +65,7 @@ static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){
+static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
   uint32_t* inputPtr = (uint32_t*)intsToSwap;
   unsigned int point;
   for(point = 0; point < num_points; point++){
@@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned
 
 
 
-#endif /* INCLUDED_volk_64u_byteswap_a16_H */
+#endif /* INCLUDED_volk_64u_byteswap_a_H */
diff --git a/volk/include/volk/volk_64u_popcnt_a16.h b/volk/include/volk/volk_64u_popcnt_a.h
index 8b92e91a1..bdaa98643 100644
--- a/volk/include/volk/volk_64u_popcnt_a16.h
+++ b/volk/include/volk/volk_64u_popcnt_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_64u_popcnt_a16_H
-#define INCLUDED_volk_64u_popcnt_a16_H
+#ifndef INCLUDED_volk_64u_popcnt_a_H
+#define INCLUDED_volk_64u_popcnt_a_H
 
 #include <stdio.h>
 #include <inttypes.h>
@@ -8,7 +8,7 @@
 #ifdef LV_HAVE_GENERIC
 
 
-static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) {
+static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) {
 
   const uint32_t* valueVector = (const uint32_t*)&value;
   
@@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t val
 
 #include <nmmintrin.h>
 
-static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) {
+static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value) {
   *ret = _mm_popcnt_u64(value);
 
 }
 
 #endif /*LV_HAVE_SSE4_2*/
 
-#endif /*INCLUDED_volk_64u_popcnt_a16_H*/
+#endif /*INCLUDED_volk_64u_popcnt_a_H*/
diff --git a/volk/include/volk/volk_8i_convert_16i_a16.h b/volk/include/volk/volk_8i_convert_16i_a.h
index 260ac40a1..9104f90cb 100644
--- a/volk/include/volk/volk_8i_convert_16i_a16.h
+++ b/volk/include/volk/volk_8i_convert_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8i_convert_16i_a16_H
-#define INCLUDED_volk_8i_convert_16i_a16_H
+#ifndef INCLUDED_volk_8i_convert_16i_a_H
+#define INCLUDED_volk_8i_convert_16i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param outputVector The 16 bit output data buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
 
@@ -54,7 +54,7 @@ static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const i
     \param outputVector The 16 bit output data buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -72,9 +72,9 @@ static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const
     \param outputVector The 16 bit output data buffer
     \param num_points The number of data values to be converted
   */
-extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points);
-static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
-    volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points);
+extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points);
+static inline void volk_8i_convert_16i_a_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+    volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a.h
index 9991b150e..7f2623ac6 100644
--- a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h
+++ b/volk/include/volk/volk_8i_s32f_convert_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H
-#define INCLUDED_volk_8i_s32f_convert_32f_a16_H
+#ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
+#define INCLUDED_volk_8i_s32f_convert_32f_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -74,7 +74,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -94,10 +94,10 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points);
-static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points);
+static inline void volk_8i_s32f_convert_32f_a_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     float invscalar = 1.0 / scalar;
-    volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points);
+    volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points);
 }
 #endif /* LV_HAVE_ORC */
 
diff --git a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h
index 249acab49..8f13da32f 100644
--- a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H
-#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H
+#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
+#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -59,7 +59,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   int16_t* qBufferPtr = qBuffer;
@@ -74,4 +74,4 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, in
 
 
 
-#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */
+#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */
diff --git a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h
index 7b64b37c5..d26b3d0d0 100644
--- a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H
-#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H
+#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
+#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -49,7 +49,7 @@ static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, c
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -63,4 +63,4 @@ static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer,
 
 
 
-#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */
+#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
diff --git a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h
index a1abad487..21efed83e 100644
--- a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
@@ -50,7 +50,7 @@ static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, cons
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h
index 7d778796e..b723c6f8b 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
-#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
+#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
+#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -15,7 +15,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
 
@@ -85,7 +85,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer,
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
 
@@ -146,7 +146,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -162,4 +162,4 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer,
 
 
 
-#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */
+#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H */
diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h
index a2e0cd8de..74073f5a6 100644
--- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
-#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
+#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
+#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -14,7 +14,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -71,7 +71,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -116,7 +116,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -131,4 +131,4 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffe
 
 
 
-#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */
+#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
index 7307ae484..0bb76f1d1 100644
--- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
-#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
+#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
+#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
+static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -75,7 +75,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
+static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
   int16_t* c16Ptr = (int16_t*)cVector;
   int8_t* a8Ptr = (int8_t*)aVector;
@@ -98,4 +98,4 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV
 
 
 
-#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */
+#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
index adc7c0599..3e05608a4 100644
--- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H
-#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H
+#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
+#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -95,7 +95,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* cPtr = (float*)cVector;
   const float invScalar = 1.0 / scalar;
@@ -119,4 +119,4 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_
 
 
 
-#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */
+#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
index 5dd41be0f..33a478265 100644
--- a/volk/lib/CMakeLists.txt
+++ b/volk/lib/CMakeLists.txt
@@ -131,6 +131,7 @@ SET(volk_gen_sources
     ${CMAKE_BINARY_DIR}/include/volk/volk.h
     ${CMAKE_BINARY_DIR}/lib/volk.c
     ${CMAKE_BINARY_DIR}/lib/volk_init.h
+    ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h
     ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h
     ${CMAKE_BINARY_DIR}/lib/volk_cpu.c
     ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index db606a472..7f86dd78b 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -233,7 +233,8 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
 
 class volk_qa_aligned_mem_pool{
 public:
-    void *get_new(size_t size, size_t alignment = 32){
+    void *get_new(size_t size){
+        size_t alignment = volk_get_alignment();
         _mems.push_back(std::vector<char>(size + alignment-1, 0));
         size_t ptr = size_t(&_mems.back().front());
         return (void *)((ptr + alignment-1) & ~(alignment-1));
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 349fb0630..62e62c2f4 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -2,92 +2,92 @@
 #include <volk/volk.h>
 #include <boost/test/unit_test.hpp>
 
-//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
-//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100);
-VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000);
-//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
-//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50);
-VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100);
-//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100);
-VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000);
-//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100);
-VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000);
-//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100);
-VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100);
-VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000);
-//VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000);
-//VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400);
-VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400);
-VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000);
-VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000);
+//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000);
+//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 100);
+VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000);
+//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000);
+//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000);
+VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 50);
+VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 100);
+//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 100);
+VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 10000);
+//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 100);
+VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 20460, 5000);
+//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 0, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 100);
+VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 100);
+VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 2000);
+//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1000);
+//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 400);
+VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 400);
+VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000);
+VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000);
 
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
index ebfe3bc40..9743c51d9 100644
--- a/volk/lib/volk_prefs.c
+++ b/volk/lib/volk_prefs.c
@@ -13,7 +13,7 @@ void get_config_path(char *path) {
     strcat(path, suffix);
 }
 
-//passing by reference in C can suck my balls
+//passing by reference in C can (***********)
 int load_preferences(struct volk_arch_pref **prefs) {
     FILE *config_file;
     char path[512], line[512], function[128], arch[32];
diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am
index 960d09b6a..814471bd0 100644
--- a/volk/orc/Makefile.am
+++ b/volk/orc/Makefile.am
@@ -25,27 +25,27 @@ noinst_LTLIBRARIES = libvolk_orc.la
 libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS)
 
 libvolk_orc_la_SOURCES = \
-volk_8i_convert_16i_a16_orc_impl.orc \
-volk_8i_s32f_convert_32f_a16_orc_impl.orc \
-volk_16u_byteswap_a16_orc_impl.orc \
-volk_32i_x2_and_32i_a16_orc_impl.orc \
-volk_32i_x2_or_32i_a16_orc_impl.orc \
-volk_32f_x2_add_32f_a16_orc_impl.orc \
-volk_32f_x2_subtract_32f_a16_orc_impl.orc \
-volk_32f_x2_divide_32f_a16_orc_impl.orc \
-volk_32f_x2_multiply_32f_a16_orc_impl.orc \
-volk_32fc_x2_multiply_32fc_a16_orc_impl.orc \
-volk_32fc_32f_multiply_32fc_a16_orc_impl.orc \
-volk_32f_sqrt_32f_a16_orc_impl.orc \
-volk_32f_x2_max_32f_a16_orc_impl.orc \
-volk_32f_x2_min_32f_a16_orc_impl.orc \
-volk_32f_s32f_normalize_a16_orc_impl.orc \
-volk_32fc_magnitude_32f_a16_orc_impl.orc \
-volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc \
-volk_16ic_magnitude_16i_a16_orc_impl.orc \
-volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc \
-volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc \
-volk_16ic_deinterleave_real_8i_a16_orc_impl.orc
+volk_8i_convert_16i_a_orc_impl.orc \
+volk_8i_s32f_convert_32f_a_orc_impl.orc \
+volk_16u_byteswap_a_orc_impl.orc \
+volk_32i_x2_and_32i_a_orc_impl.orc \
+volk_32i_x2_or_32i_a_orc_impl.orc \
+volk_32f_x2_add_32f_a_orc_impl.orc \
+volk_32f_x2_subtract_32f_a_orc_impl.orc \
+volk_32f_x2_divide_32f_a_orc_impl.orc \
+volk_32f_x2_multiply_32f_a_orc_impl.orc \
+volk_32fc_x2_multiply_32fc_a_orc_impl.orc \
+volk_32fc_32f_multiply_32fc_a_orc_impl.orc \
+volk_32f_sqrt_32f_a_orc_impl.orc \
+volk_32f_x2_max_32f_a_orc_impl.orc \
+volk_32f_x2_min_32f_a_orc_impl.orc \
+volk_32f_s32f_normalize_a_orc_impl.orc \
+volk_32fc_magnitude_32f_a_orc_impl.orc \
+volk_32fc_s32f_magnitude_16i_a_orc_impl.orc \
+volk_16ic_magnitude_16i_a_orc_impl.orc \
+volk_16ic_deinterleave_16i_x2_a_orc_impl.orc \
+volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc \
+volk_16ic_deinterleave_real_8i_a_orc_impl.orc
 
 
 
diff --git a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc
index 0189fbf5d..fd8915da0 100644
--- a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc
+++ b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl
+.function volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl
 .dest 4 idst
 .dest 4 qdst
 .source 4 src
diff --git a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc
index 56018edda..76faa936a 100644
--- a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc
+++ b/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_16ic_deinterleave_16i_x2_a16_orc_impl
+.function volk_16ic_deinterleave_16i_x2_a_orc_impl
 .dest 2 idst
 .dest 2 qdst
 .source 4 src
diff --git a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc
index dba9a4c8e..8db49fd7c 100644
--- a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc
+++ b/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_16ic_deinterleave_real_8i_a16_orc_impl
+.function volk_16ic_deinterleave_real_8i_a_orc_impl
 .dest 1 dst
 .source 4 src
 .temp 2 iw
diff --git a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc
index 37225e9b8..fbaebc46d 100644
--- a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc
+++ b/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_16ic_magnitude_16i_a16_orc_impl
+.function volk_16ic_magnitude_16i_a_orc_impl
 .source 4 src
 .dest 2 dst
 .floatparam 4 scalar
diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
index 1e2380837..66fef7d2e 100644
--- a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
+++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_16ic_magnitude_32f_a16_orc_impl
+.function volk_16ic_magnitude_32f_a_orc_impl
 .source 4 src
 .dest 4 dst
 .floatparam 4 scalar
diff --git a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc
deleted file mode 100644
index c1c8ee59e..000000000
--- a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc
+++ /dev/null
@@ -1,3 +0,0 @@
-.function volk_16u_byteswap_a16_orc_impl
-.dest 2 dst
-swapw dst, dst
diff --git a/volk/orc/volk_16u_byteswap_a_orc_impl.orc b/volk/orc/volk_16u_byteswap_a_orc_impl.orc
new file mode 100644
index 000000000..b96ba84af
--- /dev/null
+++ b/volk/orc/volk_16u_byteswap_a_orc_impl.orc
@@ -0,0 +1,3 @@
+.function volk_16u_byteswap_a_orc_impl
+.dest 2 dst
+swapw dst, dst
diff --git a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc b/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc
index acd319b16..986fdf665 100644
--- a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_s32f_normalize_a16_orc_impl
+.function volk_32f_s32f_normalize_a_orc_impl
 .source 4 src1
 .floatparam 4 invscalar
 .dest 4 dst
diff --git a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc b/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc
index ae5680f15..f339b1122 100644
--- a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_sqrt_32f_a16_orc_impl
+.function volk_32f_sqrt_32f_a_orc_impl
 .source 4 src
 .dest 4 dst
 sqrtf dst, src
diff --git a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc
index 8d095a052..450cc6a9e 100644
--- a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_add_32f_a16_orc_impl
+.function volk_32f_x2_add_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc
index 0097646cb..ee3b61b82 100644
--- a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_divide_32f_a16_orc_impl
+.function volk_32f_x2_divide_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32f_x2_dot_prod_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc
index d92afbe01..b367f3091 100644
--- a/volk/orc/volk_32f_x2_dot_prod_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_dot_prod_32f_a16_orc_impl
+.function volk_32f_x2_dot_prod_32f_a_orc_impl
 .source 4 src1
 .source 4 src2
 .dest 4 dst
diff --git a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc
index b7f008737..725201633 100644
--- a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_max_32f_a16_orc_impl
+.function volk_32f_x2_max_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc
index 78328b576..a71ed8250 100644
--- a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_min_32f_a16_orc_impl
+.function volk_32f_x2_min_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc
index e8fadff19..c17d539fd 100644
--- a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_multiply_32f_a16_orc_impl
+.function volk_32f_x2_multiply_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc
index 13fbe8c83..b3b0f256e 100644
--- a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32f_x2_subtract_32f_a16_orc_impl
+.function volk_32f_x2_subtract_32f_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc
index 455293cff..aa82699f5 100644
--- a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc
+++ b/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32fc_32f_multiply_32fc_a16_orc_impl
+.function volk_32fc_32f_multiply_32fc_a_orc_impl
 .source 8 src1
 .source 4 src2
 .dest 8 dst
diff --git a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc
index c5e2e57f1..032ab2b1b 100644
--- a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32fc_magnitude_32f_a16_orc_impl
+.function volk_32fc_magnitude_32f_a_orc_impl
 .source 8 src
 .dest 4 dst
 .temp 8 iqf
diff --git a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc
index 6116f5e1f..505e73f5d 100644
--- a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc
+++ b/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32fc_s32f_magnitude_16i_a16_orc_impl
+.function volk_32fc_s32f_magnitude_16i_a_orc_impl
 .source 8 src
 .dest 2 dst
 .floatparam 4 scalar
diff --git a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc
index a27d722cd..cb8a12d81 100644
--- a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc
+++ b/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32fc_x2_multiply_32fc_a16_orc_impl
+.function volk_32fc_x2_multiply_32fc_a_orc_impl
 .source 8 src1
 .source 8 src2
 .dest 8 dst
diff --git a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc
index 7b331f8ed..1845e4654 100644
--- a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc
+++ b/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32i_x2_and_32i_a16_orc_impl
+.function volk_32i_x2_and_32i_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc
index 4984a9ced..004663f42 100644
--- a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc
+++ b/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_32i_x2_or_32i_a16_orc_impl
+.function volk_32i_x2_or_32i_a_orc_impl
 .dest 4 dst
 .source 4 src1
 .source 4 src2
diff --git a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc b/volk/orc/volk_8i_convert_16i_a_orc_impl.orc
index f44845c88..d813c6cfa 100644
--- a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc
+++ b/volk/orc/volk_8i_convert_16i_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_8i_convert_16i_a16_orc_impl
+.function volk_8i_convert_16i_a_orc_impl
 .source 1 src
 .dest 2 dst
 convsbw dst, src
diff --git a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc b/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc
index 8f6e157e9..ad54fb1e1 100644
--- a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc
+++ b/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc
@@ -1,4 +1,4 @@
-.function volk_8i_s32f_convert_32f_a16_orc_impl
+.function volk_8i_s32f_convert_32f_a_orc_impl
 .source 1 src
 .dest 4 dst
 .floatparam 4 scalar