diff options
author | Tom Rondeau | 2011-02-01 23:19:28 -0500 |
---|---|---|
committer | Tom Rondeau | 2011-02-01 23:19:28 -0500 |
commit | 800686701206e438e5a5d645242137f9285c4fa9 (patch) | |
tree | a95ad9bac09e3a6cec43f741cc3eab17771a2fe2 /volk/include | |
parent | 023167ca8a85ab597f9e59302733f71809a8afbd (diff) | |
parent | 6503e3b21978b71908400c994148836bec4a97b9 (diff) | |
download | gnuradio-800686701206e438e5a5d645242137f9285c4fa9.tar.gz gnuradio-800686701206e438e5a5d645242137f9285c4fa9.tar.bz2 gnuradio-800686701206e438e5a5d645242137f9285c4fa9.zip |
Merge branch 'volk_rename' into next
Conflicts:
volk/include/volk/Makefile.am
volk/lib/Makefile.am
volk/lib/qa_volk.cc
Diffstat (limited to 'volk/include')
-rw-r--r-- | volk/include/volk/Makefile.am | 175 | ||||
-rw-r--r-- | volk/include/volk/archs.xml | 6 | ||||
-rw-r--r-- | volk/include/volk/make_c.py | 1 | ||||
-rw-r--r-- | volk/include/volk/make_set_simd.py | 55 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_branch_4_state_8_a16.h (renamed from volk/include/volk/volk_16s_branch_4_state_8_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_a16.h (renamed from volk/include/volk/volk_16s_convert_8s_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_u.h (renamed from volk/include/volk/volk_16s_convert_8s_unaligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_16i_a16.h (renamed from volk/include/volk/volk_16s_max_star_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h (renamed from volk/include/volk/volk_16s_max_star_horizontal_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_permute_and_scalar_add_a16.h (renamed from volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_a16.h (renamed from volk/include/volk/volk_16s_convert_32f_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_u.h (renamed from volk/include/volk/volk_16s_convert_32f_unaligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h (renamed from volk/include/volk/volk_16s_quad_max_star_aligned16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h (renamed from volk/include/volk/volk_16s_add_quad_aligned16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h (renamed from volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h (renamed from volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h (renamed from volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h) | 29 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_magnitude_16i_a16.h (renamed from volk/include/volk/volk_16sc_magnitude_16s_aligned16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h (renamed from volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h (renamed from volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h (renamed from volk/include/volk/volk_16sc_magnitude_32f_aligned16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_16u_byteswap_a16.h (renamed from volk/include/volk/volk_16u_byteswap_aligned16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_accumulator_s32f_a16.h (renamed from volk/include/volk/volk_32f_accumulator_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_a16.h (renamed from volk/include/volk/volk_32f_convert_64f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_u.h (renamed from volk/include/volk/volk_32f_convert_64f_unaligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_index_max_16u_a16.h (renamed from volk/include/volk/volk_32f_index_max_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h (renamed from volk/include/volk/volk_32f_fm_detect_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h (renamed from volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_a16.h (renamed from volk/include/volk/volk_32f_convert_16s_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_u.h (renamed from volk/include/volk/volk_32f_convert_16s_unaligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_a16.h (renamed from volk/include/volk/volk_32f_convert_32s_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_u.h (renamed from volk/include/volk/volk_32f_convert_32s_unaligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_a16.h (renamed from volk/include/volk/volk_32f_convert_8s_aligned16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_u.h (renamed from volk/include/volk/volk_32f_convert_8s_unaligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_normalize_a16.h (renamed from volk/include/volk/volk_32f_normalize_aligned16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_power_32f_a16.h (renamed from volk/include/volk/volk_32f_power_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_stddev_32f_a16.h (renamed from volk/include/volk/volk_32f_stddev_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_sqrt_32f_a16.h (renamed from volk/include/volk/volk_32f_sqrt_aligned16.h) | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h (renamed from volk/include/volk/volk_32f_stddev_and_mean_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_add_32f_a16.h (renamed from volk/include/volk/volk_32f_add_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_divide_32f_a16.h (renamed from volk/include/volk/volk_32f_divide_aligned16.h) | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h (renamed from volk/include/volk/volk_32f_dot_prod_aligned16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_u.h (renamed from volk/include/volk/volk_32f_dot_prod_unaligned16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_interleave_32fc_a16.h (renamed from volk/include/volk/volk_32f_interleave_32fc_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_max_32f_a16.h (renamed from volk/include/volk/volk_32f_max_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_min_32f_a16.h (renamed from volk/include/volk/volk_32f_min_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_multiply_32f_a16.h (renamed from volk/include/volk/volk_32f_multiply_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h (renamed from volk/include/volk/volk_32f_interleave_16sc_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_subtract_32f_a16.h (renamed from volk/include/volk/volk_32f_subtract_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h (renamed from volk/include/volk/volk_32f_sum_of_poly_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h (renamed from volk/include/volk/volk_32fc_32f_multiply_aligned16.h) | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h (renamed from volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h (renamed from volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_index_max_16u_a16.h (renamed from volk/include/volk/volk_32fc_index_max_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_magnitude_32f_a16.h (renamed from volk/include/volk/volk_32fc_magnitude_32f_aligned16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h (renamed from volk/include/volk/volk_32fc_atan2_32f_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h (renamed from volk/include/volk/volk_32fc_magnitude_16s_aligned16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_32fc_a16.h (renamed from volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h (renamed from volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h (renamed from volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h (renamed from volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h (renamed from volk/include/volk/volk_32fc_dot_prod_aligned16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h (renamed from volk/include/volk/volk_32fc_multiply_aligned16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h (renamed from volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h (renamed from volk/include/volk/volk_32fc_square_dist_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_a16.h (renamed from volk/include/volk/volk_32s_convert_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_u.h (renamed from volk/include/volk/volk_32s_convert_32f_unaligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_and_32i_a16.h (renamed from volk/include/volk/volk_32s_and_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_or_32i_a16.h (renamed from volk/include/volk/volk_32s_or_aligned16.h) | 24 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_byteswap_a16.h (renamed from volk/include/volk/volk_32u_byteswap_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_popcnt_a16.h (renamed from volk/include/volk/volk_32u_popcnt_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_a16.h (renamed from volk/include/volk/volk_64f_convert_32f_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_u.h (renamed from volk/include/volk/volk_64f_convert_32f_unaligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_max_64f_a16.h (renamed from volk/include/volk/volk_64f_max_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_min_64f_a16.h (renamed from volk/include/volk/volk_64f_min_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_byteswap_a16.h (renamed from volk/include/volk/volk_64u_byteswap_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_popcnt_a16.h (renamed from volk/include/volk/volk_64u_popcnt_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_a16.h (renamed from volk/include/volk/volk_8s_convert_16s_aligned16.h) | 20 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_u.h (renamed from volk/include/volk/volk_8s_convert_16s_unaligned16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_a16.h (renamed from volk/include/volk/volk_8s_convert_32f_aligned16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_u.h (renamed from volk/include/volk/volk_8s_convert_32f_unaligned16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h (renamed from volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h (renamed from volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h (renamed from volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h) | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h (renamed from volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h (renamed from volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h (renamed from volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h) | 10 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h (renamed from volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h) | 10 | ||||
-rwxr-xr-x | volk/include/volk/volk_register.py | 5 |
92 files changed, 911 insertions, 579 deletions
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 00289be1e..eb97775b0 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -41,94 +41,93 @@ volkinclude_HEADERS = \ volk.h \ volk_cpu.h \ volk_environment_init.h \ - volk_16s_add_quad_aligned16.h \ - volk_16s_branch_4_state_8_aligned16.h \ - volk_16sc_deinterleave_16s_aligned16.h \ - volk_16sc_deinterleave_32f_aligned16.h \ - volk_16sc_deinterleave_real_16s_aligned16.h \ - volk_16sc_deinterleave_real_32f_aligned16.h \ - volk_16sc_deinterleave_real_8s_aligned16.h \ - volk_16sc_magnitude_16s_aligned16.h \ - volk_16sc_magnitude_32f_aligned16.h \ - volk_16s_convert_32f_aligned16.h \ - volk_16s_convert_32f_unaligned16.h \ - volk_16s_convert_8s_aligned16.h \ - volk_16s_convert_8s_unaligned16.h \ - volk_16s_max_star_aligned16.h \ - volk_16s_max_star_horizontal_aligned16.h \ - volk_16s_permute_and_scalar_add_aligned16.h \ - volk_16s_quad_max_star_aligned16.h \ - volk_16u_byteswap_aligned16.h \ - volk_32f_accumulator_aligned16.h \ - volk_32f_add_aligned16.h \ - volk_32fc_32f_multiply_aligned16.h \ - volk_32fc_32f_power_32fc_aligned16.h \ - volk_32f_calc_spectral_noise_floor_aligned16.h \ - volk_32fc_atan2_32f_aligned16.h \ - volk_32fc_conjugate_dot_prod_aligned16.h \ - volk_32fc_conjugate_dot_prod_unaligned.h \ - volk_32fc_deinterleave_32f_aligned16.h \ - volk_32fc_deinterleave_64f_aligned16.h \ - volk_32fc_deinterleave_real_16s_aligned16.h \ - volk_32fc_deinterleave_real_32f_aligned16.h \ - volk_32fc_deinterleave_real_64f_aligned16.h \ - volk_32fc_dot_prod_aligned16.h \ - volk_32fc_index_max_aligned16.h \ - volk_32fc_magnitude_16s_aligned16.h \ - volk_32fc_magnitude_32f_aligned16.h \ - volk_32fc_multiply_aligned16.h \ - volk_32f_convert_16s_aligned16.h \ - volk_32f_convert_16s_unaligned16.h \ - volk_32f_convert_32s_aligned16.h \ - volk_32f_convert_32s_unaligned16.h \ - volk_32f_convert_64f_aligned16.h \ - volk_32f_convert_64f_unaligned16.h \ - volk_32f_convert_8s_aligned16.h \ - volk_32f_convert_8s_unaligned16.h \ - volk_32fc_power_spectral_density_32f_aligned16.h \ - volk_32fc_power_spectrum_32f_aligned16.h \ - volk_32fc_square_dist_aligned16.h \ - volk_32fc_square_dist_scalar_mult_aligned16.h \ - volk_32f_divide_aligned16.h \ - volk_32f_dot_prod_aligned16.h \ - volk_32f_dot_prod_unaligned16.h \ - volk_32f_fm_detect_aligned16.h \ - volk_32f_index_max_aligned16.h \ - volk_32f_interleave_16sc_aligned16.h \ - volk_32f_interleave_32fc_aligned16.h \ - volk_32f_max_aligned16.h \ - volk_32f_min_aligned16.h \ - volk_32f_multiply_aligned16.h \ - volk_32f_normalize_aligned16.h \ - volk_32f_power_aligned16.h \ - volk_32f_sqrt_aligned16.h \ - volk_32f_stddev_aligned16.h \ - volk_32f_stddev_and_mean_aligned16.h \ - volk_32f_subtract_aligned16.h \ - volk_32f_sum_of_poly_aligned16.h \ - volk_32s_and_aligned16.h \ - volk_32s_convert_32f_aligned16.h \ - volk_32s_convert_32f_unaligned16.h \ - volk_32s_or_aligned16.h \ - volk_32u_byteswap_aligned16.h \ - volk_32u_popcnt_aligned16.h \ - volk_64f_convert_32f_aligned16.h \ - volk_64f_convert_32f_unaligned16.h \ - volk_64f_max_aligned16.h \ - volk_64f_min_aligned16.h \ - volk_64u_byteswap_aligned16.h \ - volk_64u_popcnt_aligned16.h \ - volk_8sc_deinterleave_16s_aligned16.h \ - volk_8sc_deinterleave_32f_aligned16.h \ - volk_8sc_deinterleave_real_16s_aligned16.h \ - volk_8sc_deinterleave_real_32f_aligned16.h \ - volk_8sc_deinterleave_real_8s_aligned16.h \ - volk_8sc_multiply_conjugate_16sc_aligned16.h \ - volk_8sc_multiply_conjugate_32fc_aligned16.h \ - volk_8s_convert_16s_aligned16.h \ - volk_8s_convert_16s_unaligned16.h \ - volk_8s_convert_32f_aligned16.h \ - volk_8s_convert_32f_unaligned16.h + volk_16i_x5_add_quad_16i_x4_a16.h \ + volk_16i_branch_4_state_8_a16.h \ + volk_16ic_deinterleave_16i_x2_a16.h \ + volk_16ic_s32f_deinterleave_32f_x2_a16.h \ + volk_16ic_deinterleave_real_16i_a16.h \ + volk_16ic_s32f_deinterleave_real_32f_a16.h \ + volk_16ic_deinterleave_real_8i_a16.h \ + volk_16ic_magnitude_16i_a16.h \ + volk_16ic_s32f_magnitude_32f_a16.h \ + volk_16i_s32f_convert_32f_a16.h \ + volk_16i_s32f_convert_32f_u.h \ + volk_16i_convert_8i_a16.h \ + volk_16i_convert_8i_u.h \ + volk_16i_max_star_16i_a16.h \ + volk_16i_max_star_horizontal_16i_a16.h \ + volk_16i_permute_and_scalar_add_a16.h \ + volk_16i_x4_quad_max_star_16i_a16.h \ + volk_16u_byteswap_a16.h \ + volk_32f_accumulator_s32f_a16.h \ + volk_32f_x2_add_32f_a16.h \ + volk_32fc_32f_multiply_32fc_a16.h \ + volk_32fc_s32f_power_32fc_a16.h \ + volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \ + volk_32fc_s32f_atan2_32f_a16.h \ + volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \ + volk_32fc_deinterleave_32f_x2_a16.h \ + volk_32fc_deinterleave_64f_x2_a16.h \ + volk_32fc_s32f_deinterleave_real_16i_a16.h \ + volk_32fc_deinterleave_real_32f_a16.h \ + volk_32fc_deinterleave_real_64f_a16.h \ + volk_32fc_x2_dot_prod_32fc_a16.h \ + volk_32fc_index_max_16u_a16.h \ + volk_32fc_s32f_magnitude_16i_a16.h \ + volk_32fc_magnitude_32f_a16.h \ + volk_32fc_x2_multiply_32fc_a16.h \ + volk_32f_s32f_convert_16i_a16.h \ + volk_32f_s32f_convert_16i_u.h \ + volk_32f_s32f_convert_32i_a16.h \ + volk_32f_s32f_convert_32i_u.h \ + volk_32f_convert_64f_a16.h \ + volk_32f_convert_64f_u.h \ + volk_32f_s32f_convert_8i_a16.h \ + volk_32f_s32f_convert_8i_u.h \ + volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \ + volk_32fc_s32f_power_spectrum_32f_a16.h \ + volk_32fc_x2_square_dist_32f_a16.h \ + volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \ + volk_32f_x2_divide_32f_a16.h \ + volk_32f_x2_dot_prod_32f_a16.h \ + volk_32f_x2_dot_prod_32f_u.h \ + volk_32f_s32f_32f_fm_detect_32f_a16.h \ + volk_32f_index_max_16u_a16.h \ + volk_32f_x2_s32f_interleave_16ic_a16.h \ + volk_32f_x2_interleave_32fc_a16.h \ + volk_32f_x2_max_32f_a16.h \ + volk_32f_x2_min_32f_a16.h \ + volk_32f_x2_multiply_32f_a16.h \ + volk_32f_s32f_normalize_a16.h \ + volk_32f_s32f_power_32f_a16.h \ + volk_32f_sqrt_32f_a16.h \ + volk_32f_s32f_stddev_32f_a16.h \ + volk_32f_stddev_and_mean_32f_x2_a16.h \ + volk_32f_x2_subtract_32f_a16.h \ + volk_32f_x3_sum_of_poly_32f_a16.h \ + volk_32i_x2_and_32i_a16.h \ + volk_32i_s32f_convert_32f_a16.h \ + volk_32i_s32f_convert_32f_u.h \ + volk_32i_x2_or_32i_a16.h \ + volk_32u_byteswap_a16.h \ + volk_32u_popcnt_a16.h \ + volk_64f_convert_32f_a16.h \ + volk_64f_convert_32f_u.h \ + volk_64f_x2_max_64f_a16.h \ + volk_64f_x2_min_64f_a16.h \ + volk_64u_byteswap_a16.h \ + volk_64u_popcnt_a16.h \ + volk_8ic_deinterleave_16i_x2_a16.h \ + volk_8ic_s32f_deinterleave_32f_x2_a16.h \ + volk_8ic_deinterleave_real_16i_a16.h \ + volk_8ic_s32f_deinterleave_real_32f_a16.h \ + volk_8ic_deinterleave_real_8i_a16.h \ + volk_8ic_x2_multiply_conjugate_16ic_a16.h \ + volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \ + volk_8i_convert_16i_a16.h \ + volk_8i_convert_16i_u.h \ + volk_8i_s32f_convert_32f_a16.h \ + volk_8i_s32f_convert_32f_u.h VOLK_MKTABLES_SOURCES = \ $(platform_CODE) \ diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml index b7c98500f..a19a5add9 100644 --- a/volk/include/volk/archs.xml +++ b/volk/include/volk/archs.xml @@ -5,6 +5,12 @@ <flag>none</flag> </arch> +<arch name="orc" type="all"> + <flag>lorc-0.4</flag> + <overrule>LV_HAVE_ORC</overrule> + <overrule_val>no</overrule_val> +</arch> + <arch name="altivec" type="powerpc"> <flag>maltivec</flag> </arch> diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py index f2432d7a4..6e75067d0 100644 --- a/volk/include/volk/make_c.py +++ b/volk/include/volk/make_c.py @@ -25,7 +25,6 @@ def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : tempstring = tempstring + " return 0;\n" tempstring = tempstring + "}\n" - for i in range(len(funclist)): tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py index 275d3869f..c74b0464d 100644 --- a/volk/include/volk/make_set_simd.py +++ b/volk/include/volk/make_set_simd.py @@ -95,7 +95,7 @@ def make_set_simd(dom) : arch = str(domarch.attributes["name"].value); tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"generic\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; @@ -165,8 +165,22 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" tempstring = tempstring + " (powerpc)\n" @@ -210,14 +224,49 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; + tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" + tempstring = tempstring + " (*)\n" + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + if atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" tempstring = tempstring + " ;;\n" tempstring = tempstring + " esac\n" tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" tempstring = tempstring + "])\n" return tempstring; - - + diff --git a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h b/volk/include/volk/volk_16i_branch_4_state_8_a16.h index fb9d7cb87..3437c1a6b 100644 --- a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h +++ b/volk/include/volk/volk_16i_branch_4_state_8_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H -#define INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H +#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H +#define INCLUDED_volk_16i_branch_4_state_8_a16_H #include<inttypes.h> @@ -14,7 +14,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_branch_4_state_8_aligned16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11; @@ -138,7 +138,7 @@ static inline void volk_16s_branch_4_state_8_aligned16_ssse3(short* target, sh #endif /*LV_HAVE_SSEs*/ #if LV_HAVE_GENERIC -static inline void volk_16s_branch_4_state_8_aligned16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { int i = 0; int bound = 4; @@ -191,4 +191,4 @@ static inline void volk_16s_branch_4_state_8_aligned16_generic(short* target, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/ diff --git a/volk/include/volk/volk_16s_convert_8s_aligned16.h b/volk/include/volk/volk_16i_convert_8i_a16.h index 64c368688..73e45ad63 100644 --- a/volk/include/volk/volk_16s_convert_8s_aligned16.h +++ b/volk/include/volk/volk_16i_convert_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H +#ifndef INCLUDED_volk_16i_convert_8i_a16_H +#define INCLUDED_volk_16i_convert_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -52,7 +52,7 @@ static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, cons \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -66,4 +66,4 @@ static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, c -#endif /* INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_a16_H */ diff --git a/volk/include/volk/volk_16s_convert_8s_unaligned16.h b/volk/include/volk/volk_16i_convert_8i_u.h index ca925de86..5fc792b56 100644 --- a/volk/include/volk/volk_16s_convert_8s_unaligned16.h +++ b/volk/include/volk/volk_16i_convert_8i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H +#ifndef INCLUDED_volk_16i_convert_8i_u_H +#define INCLUDED_volk_16i_convert_8i_u_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -54,7 +54,7 @@ static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, co \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_u_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -68,4 +68,4 @@ static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, -#endif /* INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_u_H */ diff --git a/volk/include/volk/volk_16s_max_star_aligned16.h b/volk/include/volk/volk_16i_max_star_16i_a16.h index ba4e979ec..ff57bd2a1 100644 --- a/volk/include/volk/volk_16s_max_star_aligned16.h +++ b/volk/include/volk/volk_16i_max_star_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H -#define INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H +#ifndef INCLUDED_volk_16i_max_star_16i_a16_H +#define INCLUDED_volk_16i_max_star_16i_a16_H #include<inttypes.h> @@ -12,7 +12,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_max_star_aligned16_ssse3(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) { @@ -87,7 +87,7 @@ static inline void volk_16s_max_star_aligned16_ssse3(short* target, short* src0 #if LV_HAVE_GENERIC -static inline void volk_16s_max_star_aligned16_generic(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) { int i = 0; @@ -105,4 +105,4 @@ static inline void volk_16s_max_star_aligned16_generic(short* target, short* src #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h index 82d011677..695e08dbf 100644 --- a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H +#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H +#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H #include<inttypes.h> @@ -12,7 +12,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d}; @@ -110,7 +110,7 @@ static inline void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target #if LV_HAVE_GENERIC -static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { int i = 0; @@ -127,4 +127,4 @@ static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* targe #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h index 452d05c4f..e52a949fb 100644 --- a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h +++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H -#define INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H +#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H +#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -117,7 +117,7 @@ static inline void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target #if LV_HAVE_GENERIC -static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { int i = 0; @@ -136,4 +136,4 @@ static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* targ #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/ diff --git a/volk/include/volk/volk_16s_convert_32f_aligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h index 126ce1528..83fd26ff9 100644 --- a/volk/include/volk/volk_16s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H +#define INCLUDED_volk_16i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -68,7 +68,7 @@ static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, co \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -102,7 +102,7 @@ static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -116,4 +116,4 @@ static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_16s_convert_32f_unaligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h index d6212fba5..8f0dd0083 100644 --- a/volk/include/volk/volk_16s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H +#define INCLUDED_volk_16i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -70,7 +70,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector, \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -105,7 +105,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, con \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -119,4 +119,4 @@ static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_16s_quad_max_star_aligned16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h index 1004c4d23..e4ec5ab4e 100644 --- a/volk/include/volk/volk_16s_quad_max_star_aligned16.h +++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H -#define INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H +#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H +#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<emmintrin.h> -static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { @@ -96,9 +96,9 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* /*asm volatile ( - "volk_16s_quad_max_star_aligned16_sse2_L1:\n\t" + "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je volk_16s_quad_max_star_aligned16_sse2_END\n\t" + "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" @@ -143,9 +143,9 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* "movaps %%xmm1, (%[target])\n\t" "addw $16, %[target]\n\t" - "jmp volk_16s_quad_max_star_aligned16_sse2_L1\n\t" + "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t" - "volk_16s_quad_max_star_aligned16_sse2_END:\n\t" + "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target) : @@ -168,7 +168,7 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* #if LV_HAVE_GENERIC -static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { int i = 0; @@ -188,4 +188,4 @@ static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_add_quad_aligned16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h index 63042bef1..5744ca3a6 100644 --- a/volk/include/volk/volk_16s_add_quad_aligned16.h +++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H -#define INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H +#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H +#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4; __m128i *p_target0, *p_target1, *p_target2, *p_target3, *p_src0, *p_src1, *p_src2, *p_src3, *p_src4; @@ -65,9 +65,9 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ } /*asm volatile ( - ".%=volk_16s_add_quad_aligned16_sse2_L1:\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je .%=volk_16s_add_quad_aligned16_sse2_END\n\t" + "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" "movaps (%[src2]), %%xmm3\n\t" @@ -91,8 +91,8 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ "add $16, %[target1]\n\t" "add $16, %[target2]\n\t" "add $16, %[target3]\n\t" - "jmp .%=volk_16s_add_quad_aligned16_sse2_L1\n\t" - ".%=volk_16s_add_quad_aligned16_sse2_END:\n\t" + "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3) :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -113,7 +113,7 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ #if LV_HAVE_GENERIC -static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { int i = 0; @@ -133,4 +133,4 @@ static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* ta -#endif /*INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/ diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h index 32e13df98..7e08bf182 100644 --- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H +#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer, \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -128,7 +128,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, i \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h index b594c85b8..388c00592 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H +#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -55,7 +55,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuf \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -103,7 +103,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuff \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -117,4 +117,4 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iB -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h index c0d1e941a..55a25702e 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H +#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -53,7 +53,7 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe number = sixteenthPoints * 16; int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr; for(; number < num_points; number++){ - *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ / 256)); + *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8)); int16ComplexVectorPtr++; } } @@ -66,18 +66,29 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; - const int16_t* complexVectorPtr = (int16_t*)complexVector; + int16_t* complexVectorPtr = (int16_t*)complexVector; int8_t* iBufferPtr = iBuffer; for(number = 0; number < num_points; number++){ - *iBufferPtr++ = (int8_t)(*complexVectorPtr++ / 256); + *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8)); complexVectorPtr++; } } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h index 1482ab82e..bdcace750 100644 --- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H +#define INCLUDED_volk_16ic_magnitude_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -84,7 +84,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -160,11 +160,11 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; - const float scalar = 32786.0; + const float scalar = 32768.0; for(number = 0; number < num_points; number++){ float real = ((float)(*complexVectorPtr++)) / scalar; float imag = ((float)(*complexVectorPtr++)) / scalar; @@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); +static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h index 86f67437d..606de2fc5 100644 --- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -77,7 +77,7 @@ static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, floa \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex data values to be deinterleaved + */ +extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h index 3e7be1e64..62331e496 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuff \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -107,7 +107,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; @@ -122,4 +122,4 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuf -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h index 9c2a48835..ae64efbeb 100644 --- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H -#define INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -79,7 +79,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -148,7 +148,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */ diff --git a/volk/include/volk/volk_16u_byteswap_aligned16.h b/volk/include/volk/volk_16u_byteswap_a16.h index 698e958e4..c8128dbab 100644 --- a/volk/include/volk/volk_16u_byteswap_aligned16.h +++ b/volk/include/volk/volk_16u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_16u_byteswap_a16_H +#define INCLUDED_volk_16u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint16_t* inputPtr = intsToSwap; __m128i input, left, right, output; @@ -49,7 +49,7 @@ static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){ unsigned int point; uint16_t* inputPtr = intsToSwap; for(point = 0; point < num_points; point++){ @@ -61,5 +61,17 @@ static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, uns } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Byteswaps (in-place) an aligned vector of int16_t's. + \param intsToSwap The vector of data to byte swap + \param numDataPoints The number of data points +*/ +extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points); +static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){ + volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_16u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_32f_accumulator_aligned16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h index 7e395cf50..4a3588e6d 100644 --- a/volk/include/volk/volk_32f_accumulator_aligned16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H +#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H +#define INCLUDED_volk_32f_accumulator_s32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_aligned16_sse(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -49,7 +49,7 @@ static inline void volk_32f_accumulator_aligned16_sse(float* result, const float \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_aligned16_generic(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){ const float* aPtr = inputBuffer; unsigned int number = 0; float returnValue = 0; @@ -64,4 +64,4 @@ static inline void volk_32f_accumulator_aligned16_generic(float* result, const f -#endif /* INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_aligned16.h b/volk/include/volk/volk_32f_convert_64f_a16.h index 91a855813..c303dc118 100644 --- a/volk/include/volk/volk_32f_convert_64f_aligned16.h +++ b/volk/include/volk/volk_32f_convert_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H +#ifndef INCLUDED_volk_32f_convert_64f_a16_H +#define INCLUDED_volk_32f_convert_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, con \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_unaligned16.h b/volk/include/volk/volk_32f_convert_64f_u.h index 698e0d446..a825767de 100644 --- a/volk/include/volk/volk_32f_convert_64f_unaligned16.h +++ b/volk/include/volk/volk_32f_convert_64f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_convert_64f_u_H +#define INCLUDED_volk_32f_convert_64f_u_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, c \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_u_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector -#endif /* INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_u_H */ diff --git a/volk/include/volk/volk_32f_index_max_aligned16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h index 26322bfa2..d070e17d5 100644 --- a/volk/include/volk/volk_32f_index_max_aligned16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32f_index_max_16u_a16_H +#define INCLUDED_volk_32f_index_max_16u_a16_H #include <volk/volk_common.h> #include <inttypes.h> @@ -8,7 +8,7 @@ #if LV_HAVE_SSE4_1 #include<smmintrin.h> -static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -66,7 +66,7 @@ static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, con #if LV_HAVE_SSE #include<xmmintrin.h> -static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -123,7 +123,7 @@ static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const #endif /*LV_HAVE_SSE*/ #if LV_HAVE_GENERIC -static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ float max = src0[0]; unsigned int index = 0; @@ -145,4 +145,4 @@ static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, co #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/ diff --git a/volk/include/volk/volk_32f_fm_detect_aligned16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h index c82239d74..ff4d5b19c 100644 --- a/volk/include/volk/volk_32f_fm_detect_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H -#define INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H +#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_noints The number of real values in the input vector. */ -static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -87,7 +87,7 @@ static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const f \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_points The number of real values in the input vector. */ -static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -117,4 +117,4 @@ static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, con -#endif /* INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h index ff917525f..168245d65 100644 --- a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -16,7 +16,7 @@ \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -127,7 +127,7 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noise \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ float sumMean = 0.0; unsigned int number; // find the sum (for mean), etc @@ -164,4 +164,4 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* n -#endif /* INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_16s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h index 7fbabd9c3..d6b16e336 100644 --- a/volk/include/volk/volk_32f_convert_16s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H +#define INCLUDED_volk_32f_s32f_convert_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, co \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -93,7 +93,7 @@ static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -107,4 +107,4 @@ static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_16s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index d2bbdf13a..4d306e53c 100644 --- a/volk/include/volk/volk_32f_convert_16s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H +#define INCLUDED_volk_32f_s32f_convert_16i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -55,7 +55,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector, \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,7 +96,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, c \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -110,4 +110,4 @@ static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVecto -#endif /* INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */ diff --git a/volk/include/volk/volk_32f_convert_32s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h index 011ef5d0e..ae874fd7b 100644 --- a/volk/include/volk/volk_32f_convert_32s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H +#define INCLUDED_volk_32f_s32f_convert_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -49,7 +49,7 @@ static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, co \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -89,7 +89,7 @@ static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -103,4 +103,4 @@ static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_32s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index a6df826c7..561fcd800 100644 --- a/volk/include/volk/volk_32f_convert_32s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H +#define INCLUDED_volk_32f_s32f_convert_32i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector, \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -92,7 +92,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, c \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -106,4 +106,4 @@ static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVecto -#endif /* INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */ diff --git a/volk/include/volk/volk_32f_convert_8s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h index b9487b622..f64f2a213 100644 --- a/volk/include/volk/volk_32f_convert_8s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H +#define INCLUDED_volk_32f_s32f_convert_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -60,7 +60,7 @@ static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -100,13 +100,13 @@ static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar)); + *outputVectorPtr++ = (int8_t)(*inputVectorPtr++ * scalar); } } #endif /* LV_HAVE_GENERIC */ @@ -114,4 +114,4 @@ static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, c -#endif /* INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_8s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index e986dbc87..420693571 100644 --- a/volk/include/volk/volk_32f_convert_8s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H +#define INCLUDED_volk_32f_s32f_convert_8i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -62,7 +62,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, co \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -103,7 +103,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, con \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -117,4 +117,4 @@ static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */ diff --git a/volk/include/volk/volk_32f_normalize_aligned16.h b/volk/include/volk/volk_32f_s32f_normalize_a16.h index 1aabb1d9d..0850cddf7 100644 --- a/volk/include/volk/volk_32f_normalize_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_normalize_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H -#define INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H +#define INCLUDED_volk_32f_s32f_normalize_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param num_points The number of values in vecBuffer \param scalar The scale value to be applied to each buffer value */ -static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; @@ -49,7 +49,7 @@ static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const floa \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; const float invScalar = 1.0 / scalar; @@ -60,7 +60,22 @@ static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Normalizes the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be normalizeed + \param bVector One of the vectors to be normalizeed + \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector +*/ +extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); +static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){ + float invscalar = 1.0 / scalar; + volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); +} +#endif /* LV_HAVE_GENERIC */ + -#endif /* INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */ diff --git a/volk/include/volk/volk_32f_power_aligned16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h index 2ecd8eecb..3ed594d9a 100644 --- a/volk/include/volk/volk_32f_power_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_POWER_ALIGNED16_H -#define INCLUDED_VOLK_32f_POWER_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H +#define INCLUDED_volk_32f_s32f_power_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -76,7 +76,7 @@ static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float* \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -127,7 +127,7 @@ static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVe \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -141,4 +141,4 @@ static inline void volk_32f_power_aligned16_generic(float* cVector, const float* -#endif /* INCLUDED_VOLK_32f_POWER_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_stddev_aligned16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h index 1c6a08437..32f4fa067 100644 --- a/volk/include/volk/volk_32f_stddev_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H -#define INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -74,7 +74,7 @@ static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float* \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -119,7 +119,7 @@ static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inp \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ const float* aPtr = inputBuffer; @@ -141,4 +141,4 @@ static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float* -#endif /* INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_sqrt_aligned16.h b/volk/include/volk/volk_32f_sqrt_32f_a16.h index 0b2eaf251..513c2cffe 100644 --- a/volk/include/volk/volk_32f_sqrt_aligned16.h +++ b/volk/include/volk/volk_32f_sqrt_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_SQRT_ALIGNED16_H -#define INCLUDED_VOLK_32f_SQRT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H +#define INCLUDED_volk_32f_sqrt_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -47,7 +47,7 @@ static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVec \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -58,7 +58,20 @@ static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int); +/*! + \brief Sqrts the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be sqrted + \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector +*/ +static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){ + volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points); +} + +#endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_VOLK_32f_SQRT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h index 1cd502257..278089841 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H +#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -87,7 +87,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, floa \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -142,7 +142,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -166,4 +166,4 @@ static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, flo -#endif /* INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_32f_add_aligned16.h b/volk/include/volk/volk_32f_x2_add_32f_a16.h index 721c60fd6..d0d0e0a0e 100644 --- a/volk/include/volk/volk_32f_add_aligned16.h +++ b/volk/include/volk/volk_32f_x2_add_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_ADD_ALIGNED16_H -#define INCLUDED_VOLK_32f_ADD_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H +#define INCLUDED_volk_32f_x2_add_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVect \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_add_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32f_add_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32f_ADD_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_divide_aligned16.h b/volk/include/volk/volk_32f_x2_divide_32f_a16.h index c00700cd8..d844e25b0 100644 --- a/volk/include/volk/volk_32f_divide_aligned16.h +++ b/volk/include/volk/volk_32f_x2_divide_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H -#define INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H +#define INCLUDED_volk_32f_x2_divide_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aV \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_divide_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,20 @@ static inline void volk_32f_divide_aligned16_generic(float* cVector, const float } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Divides the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be divideed + \param bVector The divisor vector + \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector +*/ +extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_dot_prod_aligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h index 3aee1136a..61aa56815 100644 --- a/volk/include/volk/volk_32f_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H #include<stdio.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32f_dot_prod_aligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_aligned16_generic(float * result, const flo #if LV_HAVE_SSE -static inline void volk_32f_dot_prod_aligned16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_aligned16_sse( float* result, const float* #include <pmmintrin.h> -static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float #include <smmintrin.h> -static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const floa #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/ diff --git a/volk/include/volk/volk_32f_dot_prod_unaligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index bce6aa15f..8469a3cea 100644 --- a/volk/include/volk/volk_32f_dot_prod_unaligned16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H -#define INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_u_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_u_H #include<stdio.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const f #if LV_HAVE_SSE -static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const floa #include <pmmintrin.h> -static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const floa #include <smmintrin.h> -static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const fl #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_u_H*/ diff --git a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h index 859c6a0ef..29c9392df 100644 --- a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h +++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H +#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ unsigned int number = 0; float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; @@ -56,7 +56,7 @@ static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVect \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complex -#endif /* INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */ diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_x2_max_32f_a16.h index 96aafb2bf..26e7f1246 100644 --- a/volk/include/volk/volk_32f_max_aligned16.h +++ b/volk/include/volk/volk_32f_x2_max_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32f_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H +#define INCLUDED_volk_32f_x2_max_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVect \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_max_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_x2_min_32f_a16.h index e247f4213..23bae044c 100644 --- a/volk/include/volk/volk_32f_min_aligned16.h +++ b/volk/include/volk/volk_32f_x2_min_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MIN_ALIGNED16_H -#define INCLUDED_VOLK_32f_MIN_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H +#define INCLUDED_volk_32f_x2_min_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVect \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_min_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_multiply_aligned16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h index b557580ab..a0dcfa86e 100644 --- a/volk/include/volk/volk_32f_multiply_aligned16.h +++ b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H +#define INCLUDED_volk_32f_x2_multiply_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float* \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_multiply_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32f_multiply_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Multiplys the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h index 476946b88..30306774d 100644 --- a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,7 +72,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVec \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -136,7 +136,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVect \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ int16_t* complexVectorPtr = (int16_t*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -152,4 +152,4 @@ static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complex -#endif /* INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */ diff --git a/volk/include/volk/volk_32f_subtract_aligned16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h index ac3f5e5d1..7404bfe79 100644 --- a/volk/include/volk/volk_32f_subtract_aligned16.h +++ b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H -#define INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H +#define INCLUDED_volk_32f_x2_subtract_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float* \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_subtract_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,5 +63,19 @@ static inline void volk_32f_subtract_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Subtracts bVector form aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The initial vector + \param bVector The vector to be subtracted + \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector +*/ +extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h index a326e62b1..af9e39537 100644 --- a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h +++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H +#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { float result = 0.0; @@ -100,7 +100,7 @@ static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src #if LV_HAVE_GENERIC -static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { @@ -148,4 +148,4 @@ static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/ diff --git a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h index 436656ca0..514998800 100644 --- a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h +++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H +#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vectors containing the float values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, cons \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const float* bPtr= bVector; @@ -76,7 +76,20 @@ static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector The complex vector to be multiplied + \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ + volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h index 02085cd1e..84d2576ed 100644 --- a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, floa \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer, -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h index 3d9ebccdd..34262a7af 100644 --- a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, do \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer, -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h index 2af973bcc..9838ec88b 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H +#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; @@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuf -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h index f408589c4..af392d074 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H +#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBu -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */ diff --git a/volk/include/volk/volk_32fc_index_max_aligned16.h b/volk/include/volk/volk_32fc_index_max_16u_a16.h index d77a95f90..532ae4e7c 100644 --- a/volk/include/volk/volk_32fc_index_max_aligned16.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H +#define INCLUDED_volk_32fc_index_max_16u_a16_H #include <volk/volk_common.h> #include<inttypes.h> @@ -11,7 +11,7 @@ #include<pmmintrin.h> -static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { @@ -189,7 +189,7 @@ static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_3 #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { float sq_dist = 0.0; float max = 0.0; unsigned int index = 0; @@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, l #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_a16.h index 7a8fd1ef9..be7216dce 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H +#define INCLUDED_volk_32fc_magnitude_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -103,7 +103,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector, \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h index df0ebb987..e9f74438d 100644 --- a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H -#define INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H +#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -81,7 +81,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector, co \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -139,7 +139,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector, const \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ float* outPtr = outputVector; const float* inPtr = (float*)inputVector; const float invNormalizeFactor = 1.0 / normalizeFactor; @@ -155,4 +155,4 @@ static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, co -#endif /* INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h index 3026b2422..31465bff9 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -62,7 +62,7 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* iBufferPtr = iBuffer; unsigned int number = 0; @@ -77,4 +77,4 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iB -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h index 4e64d8c22..530359600 100644 --- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -69,7 +69,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -128,7 +128,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param scalar The scale value multiplied to the magnitude of each complex vector + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */ diff --git a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h index 2d71ee4f8..3507fdb3c 100644 --- a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H -#define INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H +#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -18,7 +18,7 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -89,7 +89,7 @@ static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, co \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; unsigned int number = 0; @@ -106,4 +106,4 @@ static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector -#endif /* INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h index 645629b9d..39d8f7aa2 100644 --- a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H +#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param normalizationFactor This value is divided against all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; @@ -96,7 +96,7 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOu \param normalizationFactor This value is divided agains all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -123,4 +123,4 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowe -#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h index 52ec0f95b..0120b5307 100644 --- a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H +#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -20,7 +20,7 @@ \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; @@ -103,7 +103,7 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* lo \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -131,4 +131,4 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float* -#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h index 60103c1b5..a01971df3 100644 --- a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H #include<volk/volk_complex.h> #include<stdio.h> @@ -8,7 +8,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -62,7 +62,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* res #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -203,7 +203,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result, #endif #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -341,4 +341,4 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* resu -#endif /*INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/ diff --git a/volk/include/volk/volk_32fc_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h index 1a834dc25..9a7b65ab4 100644 --- a/volk/include/volk/volk_32fc_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H #include <volk/volk_complex.h> #include <stdio.h> @@ -9,7 +9,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -59,7 +59,7 @@ static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm @@ -194,7 +194,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm volatile ( @@ -320,7 +320,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const #include <pmmintrin.h> -static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { lv_32fc_t dotProduct; @@ -377,8 +377,8 @@ static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv #include <smmintrin.h> -static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - volk_32fc_dot_prod_aligned16_sse3(result, input, taps, num_bytes); +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { + volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes); // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now /* __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; @@ -465,4 +465,4 @@ static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/ diff --git a/volk/include/volk/volk_32fc_multiply_aligned16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h index 6a1649fdb..b4214f5d2 100644 --- a/volk/include/volk/volk_32fc_multiply_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h @@ -1,9 +1,10 @@ -#ifndef INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H +#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> +#include <float.h> #if LV_HAVE_SSE3 #include <pmmintrin.h> @@ -14,7 +15,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -60,7 +61,7 @@ static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const l \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const lv_32fc_t* bPtr= bVector; @@ -72,7 +73,22 @@ static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, cons } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); +static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + + -#endif /* INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h index 0fcc86f1e..6a863b16d 100644 --- a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H +#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -10,7 +10,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; @@ -106,7 +106,7 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* targe #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -123,4 +123,4 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* ta #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/ diff --git a/volk/include/volk/volk_32fc_square_dist_aligned16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h index 6458ea4dd..406097fc8 100644 --- a/volk/include/volk/volk_32fc_square_dist_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H +#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -9,7 +9,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -92,7 +92,7 @@ static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -109,4 +109,4 @@ static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32f #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/ diff --git a/volk/include/volk/volk_32s_convert_32f_aligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h index a407e68bd..0fcadd9cb 100644 --- a/volk/include/volk/volk_32s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H +#define INCLUDED_volk_32i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -55,7 +55,7 @@ static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, cons \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -70,4 +70,4 @@ static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_32s_convert_32f_unaligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h index ad7d4eb17..1dd6422f8 100644 --- a/volk/include/volk/volk_32s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H +#define INCLUDED_volk_32i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -57,7 +57,7 @@ static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, co \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_u_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -72,4 +72,4 @@ static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_32s_and_aligned16.h b/volk/include/volk/volk_32i_x2_and_32i_a16.h index e9f1e3a43..3baa1d856 100644 --- a/volk/include/volk/volk_32s_and_aligned16.h +++ b/volk/include/volk/volk_32i_x2_and_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_AND_ALIGNED16_H -#define INCLUDED_VOLK_32s_AND_ALIGNED16_H +#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H +#define INCLUDED_volk_32i_x2_and_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* a \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Ands the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors + \param bVector One of the vectors + \param num_points The number of values in aVector and bVector to be anded together and stored into cVector +*/ +extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32s_AND_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */ diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32i_x2_or_32i_a16.h index f4c427c4d..0be22f00a 100644 --- a/volk/include/volk/volk_32s_or_aligned16.h +++ b/volk/include/volk/volk_32i_x2_or_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_OR_ALIGNED16_H -#define INCLUDED_VOLK_32s_OR_ALIGNED16_H +#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H +#define INCLUDED_volk_32i_x2_or_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aV \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Ors the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be ored + \param bVector One of the vectors to be ored + \param num_points The number of values in aVector and bVector to be ored together and stored into cVector +*/ +extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */ diff --git a/volk/include/volk/volk_32u_byteswap_aligned16.h b/volk/include/volk/volk_32u_byteswap_a16.h index 09173a9d5..7556ec7b1 100644 --- a/volk/include/volk/volk_32u_byteswap_aligned16.h +++ b/volk/include/volk/volk_32u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_32u_byteswap_a16_H +#define INCLUDED_volk_32u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint32_t* inputPtr = intsToSwap; @@ -57,7 +57,7 @@ static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = intsToSwap; unsigned int point; @@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, uns -#endif /* INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_32u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_32u_popcnt_aligned16.h b/volk/include/volk/volk_32u_popcnt_a16.h index 37cfd112c..f6e25e4e8 100644 --- a/volk/include/volk/volk_32u_popcnt_aligned16.h +++ b/volk/include/volk/volk_32u_popcnt_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H -#define INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H +#ifndef INCLUDED_VOLK_32u_POPCNT_A16_H +#define INCLUDED_VOLK_32u_POPCNT_A16_H #include <stdio.h> #include <inttypes.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) { // This is faster than a lookup table uint32_t retVal = value; @@ -27,10 +27,10 @@ static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32 #include <nmmintrin.h> -static inline void volk_32u_popcnt_aligned16_sse4_2(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) { *ret = _mm_popcnt_u32(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H*/ +#endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/ diff --git a/volk/include/volk/volk_64f_convert_32f_aligned16.h b/volk/include/volk/volk_64f_convert_32f_a16.h index 44df66104..7dca065f0 100644 --- a/volk/include/volk/volk_64f_convert_32f_aligned16.h +++ b/volk/include/volk/volk_64f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_64f_convert_32f_a16_H +#define INCLUDED_volk_64f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, cons \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_64f_convert_32f_unaligned16.h b/volk/include/volk/volk_64f_convert_32f_u.h index 08cfb6127..6338c1433 100644 --- a/volk/include/volk/volk_64f_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_64f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_64f_convert_32f_u_H +#define INCLUDED_volk_64f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, co \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_u_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_64f_max_aligned16.h b/volk/include/volk/volk_64f_x2_max_64f_a16.h index ce4907a8c..4b0c1f5f1 100644 --- a/volk/include/volk/volk_64f_max_aligned16.h +++ b/volk/include/volk/volk_64f_x2_max_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_MAX_ALIGNED16_H -#define INCLUDED_VOLK_64f_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H +#define INCLUDED_volk_64f_x2_max_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aV \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_max_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_max_aligned16_generic(double* cVector, const double* #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_64f_MAX_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */ diff --git a/volk/include/volk/volk_64f_min_aligned16.h b/volk/include/volk/volk_64f_x2_min_64f_a16.h index acf4d6b2a..aa961e384 100644 --- a/volk/include/volk/volk_64f_min_aligned16.h +++ b/volk/include/volk/volk_64f_x2_min_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_MIN_ALIGNED16_H -#define INCLUDED_VOLK_64f_MIN_ALIGNED16_H +#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H +#define INCLUDED_volk_64f_x2_min_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aV \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_min_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_min_aligned16_generic(double* cVector, const double* #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_64f_MIN_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */ diff --git a/volk/include/volk/volk_64u_byteswap_aligned16.h b/volk/include/volk/volk_64u_byteswap_a16.h index d5e1b6f30..0eefe0138 100644 --- a/volk/include/volk/volk_64u_byteswap_aligned16.h +++ b/volk/include/volk/volk_64u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_64u_byteswap_a16_H +#define INCLUDED_volk_64u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; __m128i input, byte1, byte2, byte3, byte4, output; __m128i byte2mask = _mm_set1_epi32(0x00FF0000); @@ -65,7 +65,7 @@ static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; unsigned int point; for(point = 0; point < num_points; point++){ @@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, uns -#endif /* INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_64u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_64u_popcnt_aligned16.h b/volk/include/volk/volk_64u_popcnt_a16.h index 4d62f9375..59511dc29 100644 --- a/volk/include/volk/volk_64u_popcnt_aligned16.h +++ b/volk/include/volk/volk_64u_popcnt_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H -#define INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H +#ifndef INCLUDED_volk_64u_popcnt_a16_H +#define INCLUDED_volk_64u_popcnt_a16_H #include <stdio.h> #include <inttypes.h> @@ -8,7 +8,7 @@ #if LV_HAVE_GENERIC -static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) { const uint32_t* valueVector = (const uint32_t*)&value; @@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64 #include <nmmintrin.h> -static inline void volk_64u_popcnt_aligned16_sse4_2(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) { *ret = _mm_popcnt_u64(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H*/ +#endif /*INCLUDED_volk_64u_popcnt_a16_H*/ diff --git a/volk/include/volk/volk_8s_convert_16s_aligned16.h b/volk/include/volk/volk_8i_convert_16i_a16.h index 0efe3c6a1..3d7045753 100644 --- a/volk/include/volk/volk_8s_convert_16s_aligned16.h +++ b/volk/include/volk/volk_8i_convert_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H +#ifndef INCLUDED_volk_8i_convert_16i_a16_H +#define INCLUDED_volk_8i_convert_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -54,7 +54,7 @@ static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, c \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -65,6 +65,18 @@ static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into 16 bit integer data + \param inputVector The 8 bit input data buffer + \param outputVector The 16 bit output data buffer + \param num_points The number of data values to be converted + */ +extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); +static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ + volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8s_convert_16s_unaligned16.h b/volk/include/volk/volk_8i_convert_16i_u.h index 05b916cea..bcff13406 100644 --- a/volk/include/volk/volk_8s_convert_16s_unaligned16.h +++ b/volk/include/volk/volk_8i_convert_16i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H +#ifndef INCLUDED_volk_8i_convert_16i_u_H +#define INCLUDED_volk_8i_convert_16i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -56,7 +56,7 @@ static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector, \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_8s_convert_16s_unaligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_u_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; diff --git a/volk/include/volk/volk_8s_convert_32f_aligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h index 54b66ef8f..99a24ec10 100644 --- a/volk/include/volk/volk_8s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H +#define INCLUDED_volk_8i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -74,7 +74,7 @@ static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -86,6 +86,20 @@ static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, co } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 8 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); +static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ + float invscalar = 1.0 / scalar; + volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8s_convert_32f_unaligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h index 8019aac9a..1e30957e8 100644 --- a/volk/include/volk/volk_8s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H +#define INCLUDED_volk_8i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -76,7 +76,7 @@ static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, c \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_8s_convert_32f_unaligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_u_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; diff --git a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h index 38eaa49ea..91c9b2c58 100644 --- a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H +#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -59,7 +59,7 @@ static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer, \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -74,4 +74,4 @@ static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h index d0cb49494..bf3dc20dd 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H +#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -49,7 +49,7 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuf \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBu -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h index d84d64568..13de79423 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -50,7 +50,7 @@ static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; diff --git a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h index d0c118965..22c3ebb23 100644 --- a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -84,7 +84,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, fl \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -145,7 +145,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -161,4 +161,4 @@ static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, f -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h index c849448ea..5f1430394 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -70,7 +70,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -115,7 +115,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; @@ -130,4 +130,4 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuff -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h index 470a67539..d9cacbf46 100644 --- a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H -#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H +#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -76,7 +76,7 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t* \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; int16_t* c16Ptr = (int16_t*)cVector; int8_t* a8Ptr = (int8_t*)aVector; @@ -99,4 +99,4 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t* -#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */ diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h index 52b444cf7..6ec923a4f 100644 --- a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H -#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H +#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -95,7 +95,7 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t* \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; float* cPtr = (float*)cVector; const float invScalar = 1.0 / scalar; @@ -119,4 +119,4 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t* -#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */ diff --git a/volk/include/volk/volk_register.py b/volk/include/volk/volk_register.py index 9fded9a3e..bc8f959af 100755 --- a/volk/include/volk/volk_register.py +++ b/volk/include/volk/volk_register.py @@ -55,7 +55,7 @@ functions = []; for line in mfile: - subline = re.search(".*(aligned).*", line); + subline = re.search(".*_(a16|u)\.h.*", line); if subline: subsubline = re.search("(?<=volk_).*", subline.group(0)); if subsubline: @@ -70,11 +70,10 @@ datatypes = set(datatypes); for line in mfile: for dt in datatypes: if dt in line: - subline = re.search("(volk_" + dt +"_.*(aligned).*\.h)", line); + subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line); if subline: subsubline = re.search(".+(?=\.h)", subline.group(0)); - functions.append(subsubline.group(0)); archs = []; |