diff options
Diffstat (limited to 'volk')
-rw-r--r-- | volk/CMakeLists.txt | 88 | ||||
-rw-r--r-- | volk/Makefile.am | 6 | ||||
-rw-r--r-- | volk/Makefile.common | 17 | ||||
-rw-r--r-- | volk/apps/CMakeLists.txt | 38 | ||||
-rw-r--r-- | volk/apps/volk_profile.cc | 135 | ||||
-rwxr-xr-x | volk/bootstrap | 10 | ||||
-rw-r--r-- | volk/config/Makefile.am | 3 | ||||
-rw-r--r-- | volk/config/cppunit.m4 | 80 | ||||
-rw-r--r-- | volk/config/lf_warnings.m4 | 6 | ||||
-rw-r--r--[-rwxr-xr-x] | volk/config/lv_configure.m4 | 8 | ||||
-rw-r--r-- | volk/configure.ac | 1 | ||||
-rw-r--r-- | volk/gen/.gitignore | 3 | ||||
-rw-r--r-- | volk/gen/archs.xml (renamed from volk/include/volk/archs.xml) | 31 | ||||
-rw-r--r-- | volk/gen/machines.xml | 64 | ||||
-rw-r--r-- | volk/gen/make_c.py | 88 | ||||
-rw-r--r-- | volk/gen/make_config_fixed.py (renamed from volk/include/volk/make_config_fixed.py) | 0 | ||||
-rw-r--r-- | volk/gen/make_config_in.py (renamed from volk/include/volk/make_config_in.py) | 0 | ||||
-rw-r--r-- | volk/gen/make_cpuid_c.py | 235 | ||||
-rw-r--r-- | volk/gen/make_cpuid_h.py (renamed from volk/include/volk/make_cpuid_h.py) | 6 | ||||
-rw-r--r-- | volk/gen/make_each_machine_c.py | 86 | ||||
-rw-r--r-- | volk/gen/make_environment_init_c.py (renamed from volk/include/volk/make_environment_init_c.py) | 5 | ||||
-rw-r--r-- | volk/gen/make_environment_init_h.py (renamed from volk/include/volk/make_environment_init_h.py) | 6 | ||||
-rw-r--r-- | volk/gen/make_h.py | 38 | ||||
-rw-r--r-- | volk/gen/make_machines_c.py | 41 | ||||
-rw-r--r-- | volk/gen/make_machines_h.py | 59 | ||||
-rw-r--r-- | volk/gen/make_makefile_am.py | 123 | ||||
-rw-r--r-- | volk/gen/make_proccpu_sim.py (renamed from volk/include/volk/make_proccpu_sim.py) | 0 | ||||
-rw-r--r-- | volk/gen/make_set_simd.py | 166 | ||||
-rw-r--r-- | volk/gen/make_typedefs.py (renamed from volk/include/volk/make_typedefs.py) | 2 | ||||
-rw-r--r-- | volk/gen/volk_regexp.py (renamed from volk/include/volk/volk_regexp.py) | 6 | ||||
-rw-r--r--[-rwxr-xr-x] | volk/gen/volk_register.py (renamed from volk/include/volk/volk_register.py) | 192 | ||||
-rw-r--r-- | volk/include/volk/.gitignore | 18 | ||||
-rw-r--r-- | volk/include/volk/Makefile.am | 202 | ||||
-rw-r--r-- | volk/include/volk/emit_omnilog.py | 13 | ||||
-rw-r--r-- | volk/include/volk/make_c.py | 73 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_generic_c.py | 60 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_powerpc_c.py | 67 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_x86_c.py | 133 | ||||
-rw-r--r-- | volk/include/volk/make_h.py | 28 | ||||
-rw-r--r-- | volk/include/volk/make_init_c.py | 42 | ||||
-rw-r--r-- | volk/include/volk/make_init_h.py | 26 | ||||
-rw-r--r-- | volk/include/volk/make_mktables.py | 33 | ||||
-rw-r--r-- | volk/include/volk/make_registry.py | 62 | ||||
-rw-r--r-- | volk/include/volk/make_runtime.py | 34 | ||||
-rw-r--r-- | volk/include/volk/make_runtime_c.py | 47 | ||||
-rw-r--r-- | volk/include/volk/make_set_simd.py | 272 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_branch_4_state_8_a.h (renamed from volk/include/volk/volk_16i_branch_4_state_8_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_a.h (renamed from volk/include/volk/volk_16i_convert_8i_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_16i_a.h (renamed from volk/include/volk/volk_16i_max_star_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_horizontal_16i_a.h (renamed from volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_permute_and_scalar_add_a.h (renamed from volk/include/volk/volk_16i_permute_and_scalar_add_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_16i_s32f_convert_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h (renamed from volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h (renamed from volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_8i_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_magnitude_16i_a.h (renamed from volk/include/volk/volk_16ic_magnitude_16i_a16.h) | 35 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h (renamed from volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h) | 31 | ||||
-rw-r--r-- | volk/include/volk/volk_16u_byteswap_a.h (renamed from volk/include/volk/volk_16u_byteswap_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_accumulator_s32f_a.h (renamed from volk/include/volk/volk_32f_accumulator_s32f_a16.h) | 17 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_a.h (renamed from volk/include/volk/volk_32f_convert_64f_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_index_max_16u_a.h (renamed from volk/include/volk/volk_32f_index_max_16u_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_16i_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_32i_a16.h) | 55 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_8i_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_normalize_a.h (renamed from volk/include/volk/volk_32f_s32f_normalize_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_power_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_power_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_stddev_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_stddev_32f_a16.h) | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_sqrt_32f_a.h (renamed from volk/include/volk/volk_32f_sqrt_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h (renamed from volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_add_32f_a.h (renamed from volk/include/volk/volk_32f_x2_add_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_divide_32f_a.h (renamed from volk/include/volk/volk_32f_x2_divide_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_a.h (renamed from volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h) | 29 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_u.h | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_interleave_32fc_a.h (renamed from volk/include/volk/volk_32f_x2_interleave_32fc_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_max_32f_a.h (renamed from volk/include/volk/volk_32f_x2_max_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_min_32f_a.h (renamed from volk/include/volk/volk_32f_x2_min_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_multiply_32f_a.h (renamed from volk/include/volk/volk_32f_x2_multiply_32f_a16.h) | 61 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h (renamed from volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_subtract_32f_a.h (renamed from volk/include/volk/volk_32f_x2_subtract_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h (renamed from volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_32f_multiply_32fc_a.h (renamed from volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_64f_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_index_max_16u_a.h (renamed from volk/include/volk/volk_32fc_index_max_16u_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_magnitude_32f_a.h (renamed from volk/include/volk/volk_32fc_magnitude_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_atan2_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h) | 17 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h (renamed from volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h) | 31 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_32fc_a.h (renamed from volk/include/volk/volk_32fc_s32f_power_32fc_a16.h) | 36 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_multiply_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h (renamed from volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_square_dist_32f_a.h (renamed from volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_32i_s32f_convert_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_and_32i_a.h (renamed from volk/include/volk/volk_32i_x2_and_32i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_or_32i_a.h (renamed from volk/include/volk/volk_32i_x2_or_32i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_byteswap_a.h (renamed from volk/include/volk/volk_32u_byteswap_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_popcnt_a.h (renamed from volk/include/volk/volk_32u_popcnt_a16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_a.h (renamed from volk/include/volk/volk_64f_convert_32f_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_max_64f_a.h (renamed from volk/include/volk/volk_64f_x2_max_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_min_64f_a.h (renamed from volk/include/volk/volk_64f_x2_min_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_byteswap_a.h (renamed from volk/include/volk/volk_64u_byteswap_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_popcnt_a.h (renamed from volk/include/volk/volk_64u_popcnt_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_a.h (renamed from volk/include/volk/volk_8i_convert_16i_a16.h) | 20 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_8i_s32f_convert_32f_a16.h) | 20 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_8i_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h (renamed from volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h (renamed from volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_common.h | 92 | ||||
-rw-r--r-- | volk/include/volk/volk_complex.h | 79 | ||||
-rw-r--r-- | volk/include/volk/volk_prefs.h | 25 | ||||
-rw-r--r-- | volk/lib/.gitignore | 21 | ||||
-rw-r--r-- | volk/lib/CMakeLists.txt | 261 | ||||
-rw-r--r-- | volk/lib/Makefile.am | 158 | ||||
-rw-r--r-- | volk/lib/qa_16s_add_quad_aligned16.cc | 26 | ||||
-rw-r--r-- | volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 20 | ||||
-rw-r--r-- | volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc | 18 | ||||
-rw-r--r-- | volk/lib/qa_16s_quad_max_star_aligned16.cc | 12 | ||||
-rw-r--r-- | volk/lib/qa_32f_fm_detect_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32u_popcnt_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_64u_popcnt_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 95 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 9 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 183 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 49 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 40 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.h | 4 | ||||
-rw-r--r-- | volk/msvc/inttypes.h | 301 | ||||
-rw-r--r-- | volk/msvc/stdint.h | 251 | ||||
-rw-r--r-- | volk/orc/Makefile.am | 44 | ||||
-rw-r--r-- | volk/orc/volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc (renamed from volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc (renamed from volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc (renamed from volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc (renamed from volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc | 2 | ||||
-rw-r--r-- | volk/orc/volk_16u_byteswap_a16_orc_impl.orc | 3 | ||||
-rw-r--r-- | volk/orc/volk_16u_byteswap_a_orc_impl.orc | 3 | ||||
-rw-r--r-- | volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc (renamed from volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc | 6 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc (renamed from volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc (renamed from volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc (renamed from volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc (renamed from volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc (renamed from volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc (renamed from volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc (renamed from volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_8i_convert_16i_a_orc_impl.orc (renamed from volk/orc/volk_8i_convert_16i_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc (renamed from volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc) | 2 | ||||
-rw-r--r-- | volk/volk.pc.in | 5 |
182 files changed, 3571 insertions, 2473 deletions
diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt new file mode 100644 index 000000000..22c09b3f8 --- /dev/null +++ b/volk/CMakeLists.txt @@ -0,0 +1,88 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Project setup +######################################################################## +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +IF(NOT DEFINED CMAKE_BUILD_TYPE) + SET(CMAKE_BUILD_TYPE Release) +ENDIF() +SET(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel") +PROJECT(volk) +ENABLE_LANGUAGE(CXX) +ENABLE_LANGUAGE(C) +ENABLE_TESTING() +SET(VERSION 0.1) +SET(LIBVER 0.0.0) + +######################################################################## +# Dependencies setup +######################################################################## +FIND_PACKAGE(PythonInterp) +IF(NOT PYTHONINTERP_FOUND) + MESSAGE(FATAL_ERROR "Python interpreter required by the build system.") +ENDIF(NOT PYTHONINTERP_FOUND) + +######################################################################## +# Setup the package config file +######################################################################## +#set variables found in the pc.in file +SET(prefix ${CMAKE_INSTALL_PREFIX}) +SET(exec_prefix "\${prefix}") +SET(libdir "\${exec_prefix}/lib${LIB_SUFFIX}") +SET(includedir "\${prefix}/include") + +CONFIGURE_FILE( + ${CMAKE_CURRENT_SOURCE_DIR}/volk.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/volk.pc +@ONLY) + +INSTALL( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc + DESTINATION lib${LIB_SUFFIX}/pkgconfig +) + +######################################################################## +# Install all headers in the include directories +######################################################################## +INSTALL( + DIRECTORY ${CMAKE_SOURCE_DIR}/include/volk + DESTINATION include FILES_MATCHING PATTERN "*.h" +) + +INSTALL(FILES + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h +DESTINATION include/volk) + +######################################################################## +# Setup the library +######################################################################## +ADD_SUBDIRECTORY(lib) + +######################################################################## +# And the utility apps +######################################################################## +ADD_SUBDIRECTORY(apps) + +######################################################################## +# Print summary +######################################################################## +MESSAGE(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}") diff --git a/volk/Makefile.am b/volk/Makefile.am index 03c5aac35..829c37b78 100644 --- a/volk/Makefile.am +++ b/volk/Makefile.am @@ -23,7 +23,7 @@ ACLOCAL_AMFLAGS = -I config include $(top_srcdir)/Makefile.common -EXTRA_DIST = bootstrap configure config.h.in volk_config.h +EXTRA_DIST = bootstrap configure config.h.in SUBDIRS = config if LV_HAVE_ORC SUBDIRS += orc @@ -38,7 +38,6 @@ pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = volk.pc distclean-local: - -rm -f config/lv_set_simd_flags.m4 -rm -rf autom4te.cache -rm -f config.* -rm -f depcomp @@ -61,3 +60,6 @@ distclean-local: -rm -f configure -rm -f orc/Makefile.in -rm -f orc/*.c + -rm -rf gen/config + -rm -rf gen/include + -rm -rf gen/lib diff --git a/volk/Makefile.common b/volk/Makefile.common index eca2c6516..b9949cb33 100644 --- a/volk/Makefile.common +++ b/volk/Makefile.common @@ -20,21 +20,8 @@ # Boston, MA 02110-1301, USA. # -if MD_CPU_generic - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_generic.c -endif - -if MD_CPU_x86 - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_x86.c -endif - -if MD_CPU_powerpc - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_powerpc.c -endif - +#define gendir for files generated during bootstrap +top_gendir = $(top_srcdir)/gen ourincludedir = $(includedir)/volk diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt new file mode 100644 index 000000000..a0bf7e900 --- /dev/null +++ b/volk/apps/CMakeLists.txt @@ -0,0 +1,38 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Setup profiler +######################################################################## +IF(MSVC) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) +ENDIF(MSVC) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/lib + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc +) + +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc new file mode 100644 index 000000000..b9ac9ecc2 --- /dev/null +++ b/volk/apps/volk_profile.cc @@ -0,0 +1,135 @@ +#include "qa_utils.h" +extern "C" { +#include <volk/volk.h> +#include <volk/volk_prefs.h> +} +#include <vector> +#include <boost/foreach.hpp> +#include <iostream> +#include <fstream> +#include <sys/stat.h> +#include <sys/types.h> + +int main(int argc, char *argv[]) { + + std::vector<std::string> results; + + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + char path[256]; + get_config_path(path); + std::string config_path(path); + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + if(!config.is_open()) { //either we don't have write access or we don't have the dir yet + std::string dir(getenv("HOME")); + dir += "/.volk"; + if(mkdir(dir.c_str(), 0777) == -1) { + std::cout << "Error creating directory " << dir << std::endl; + return -1; + } + config.open(config_path.c_str()); + if(!config.is_open()) { + std::cout << "Error opening file " << config_path << std::endl; + return -1; + } + } + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); +} diff --git a/volk/bootstrap b/volk/bootstrap index ff239c88c..838f03aa2 100755 --- a/volk/bootstrap +++ b/volk/bootstrap @@ -20,8 +20,14 @@ # Boston, MA 02110-1301, USA. rm -fr config.cache autom4te*.cache -cd include/volk && chmod +x volk_register.py && ./volk_register.py && cd ../.. -aclocal -I config +#alternative to -B that wont break on python 2.5 +PYTHONDONTWRITEBYTECODE=1 +export PYTHONDONTWRITEBYTECODE +python gen/volk_register.py + +mv gen/lib/Makefile.am lib/ + +aclocal -I config -I gen/config autoconf autoheader libtoolize --automake diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am index 27e3f1296..d4786f83a 100644 --- a/volk/config/Makefile.am +++ b/volk/config/Makefile.am @@ -30,7 +30,6 @@ m4macros = \ ax_boost_base.m4 \ ax_boost_unit_test_framework.m4 \ bnv_have_qt.m4 \ - cppunit.m4 \ gr_lib64.m4 \ gr_libgnuradio_core_extra_ldflags.m4 \ gr_no_undefined.m4 \ @@ -43,7 +42,7 @@ m4macros = \ lf_cxx.m4 \ lf_warnings.m4 \ lf_x11.m4 \ - lv_set_simd_flags.m4 \ + $(top_gendir)/config/lv_set_simd_flags.m4 \ mkstemp.m4 \ onceonly.m4 \ pkg.m4 \ diff --git a/volk/config/cppunit.m4 b/volk/config/cppunit.m4 deleted file mode 100644 index 0991d51ec..000000000 --- a/volk/config/cppunit.m4 +++ /dev/null @@ -1,80 +0,0 @@ -dnl -dnl AM_PATH_CPPUNIT(MINIMUM-VERSION, [ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) -dnl -AC_DEFUN([AM_PATH_CPPUNIT], -[ - -AC_ARG_WITH(cppunit-prefix,[ --with-cppunit-prefix=PFX Prefix where CppUnit is installed (optional)], - cppunit_config_prefix="$withval", cppunit_config_prefix="") -AC_ARG_WITH(cppunit-exec-prefix,[ --with-cppunit-exec-prefix=PFX Exec prefix where CppUnit is installed (optional)], - cppunit_config_exec_prefix="$withval", cppunit_config_exec_prefix="") - - if test x$cppunit_config_exec_prefix != x ; then - cppunit_config_args="$cppunit_config_args --exec-prefix=$cppunit_config_exec_prefix" - if test x${CPPUNIT_CONFIG+set} != xset ; then - CPPUNIT_CONFIG=$cppunit_config_exec_prefix/bin/cppunit-config - fi - fi - if test x$cppunit_config_prefix != x ; then - cppunit_config_args="$cppunit_config_args --prefix=$cppunit_config_prefix" - if test x${CPPUNIT_CONFIG+set} != xset ; then - CPPUNIT_CONFIG=$cppunit_config_prefix/bin/cppunit-config - fi - fi - - AC_PATH_PROG(CPPUNIT_CONFIG, cppunit-config, no) - cppunit_version_min=$1 - - AC_MSG_CHECKING(for Cppunit - version >= $cppunit_version_min) - no_cppunit="" - if test "$CPPUNIT_CONFIG" = "no" ; then - no_cppunit=yes - else - CPPUNIT_CFLAGS=`$CPPUNIT_CONFIG --cflags` - CPPUNIT_LIBS=`$CPPUNIT_CONFIG --libs` - cppunit_version=`$CPPUNIT_CONFIG --version` - - cppunit_major_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\1/'` - cppunit_minor_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\2/'` - cppunit_micro_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` - - cppunit_major_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\1/'` - cppunit_minor_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\2/'` - cppunit_micro_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` - - cppunit_version_proper=`expr \ - $cppunit_major_version \> $cppunit_major_min \| \ - $cppunit_major_version \= $cppunit_major_min \& \ - $cppunit_minor_version \> $cppunit_minor_min \| \ - $cppunit_major_version \= $cppunit_major_min \& \ - $cppunit_minor_version \= $cppunit_minor_min \& \ - $cppunit_micro_version \>= $cppunit_micro_min ` - - if test "$cppunit_version_proper" = "1" ; then - AC_MSG_RESULT([$cppunit_major_version.$cppunit_minor_version.$cppunit_micro_version]) - else - AC_MSG_RESULT(no) - no_cppunit=yes - fi - fi - - if test "x$no_cppunit" = x ; then - ifelse([$2], , :, [$2]) - else - CPPUNIT_CFLAGS="" - CPPUNIT_LIBS="" - ifelse([$3], , :, [$3]) - fi - - AC_SUBST(CPPUNIT_CFLAGS) - AC_SUBST(CPPUNIT_LIBS) -]) - - - diff --git a/volk/config/lf_warnings.m4 b/volk/config/lf_warnings.m4 index d40c77f14..e62fb276c 100644 --- a/volk/config/lf_warnings.m4 +++ b/volk/config/lf_warnings.m4 @@ -29,7 +29,8 @@ dnl distribution terms that you use for the rest of that program. # ------------------------------------------------------------------------- AC_DEFUN([LF_CHECK_CXX_FLAG],[ - echo 'void f(){}' > conftest.cc + echo "#include <stdio.h> +int main(int argc, char **argv){return 0;}" > conftest.cc for i in $1 do AC_MSG_CHECKING([whether $CXX accepts $i]) @@ -54,7 +55,8 @@ AC_DEFUN([LF_CHECK_CXX_FLAG],[ # ------------------------------------------------------------------------- AC_DEFUN([LF_CHECK_CC_FLAG],[ - echo 'void f(){}' > conftest.c + echo "#include <stdio.h> +int main(int argc, char **argv){return 0;}" > conftest.c for i in $1 do AC_MSG_CHECKING([whether $CC accepts $i]) diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4 index dfa490cdf..358fba030 100755..100644 --- a/volk/config/lv_configure.m4 +++ b/volk/config/lv_configure.m4 @@ -109,14 +109,6 @@ dnl AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes]) AC_CHECK_PROG([XMLTO],[xmlto],[yes],[]) AM_CONDITIONAL([HAS_XMLTO], [test x$XMLTO = xyes]) - dnl Define where to look for cppunit includes and libs - dnl sets CPPUNIT_CFLAGS and CPPUNIT_LIBS - dnl Try using pkg-config first, then fall back to cppunit-config. - PKG_CHECK_EXISTS(cppunit, - [PKG_CHECK_MODULES(CPPUNIT, cppunit >= 1.9.14)], - [AM_PATH_CPPUNIT([1.9.14],[], - [AC_MSG_ERROR([VOLK requires cppunit. Stop])])]) - dnl PKG_CHECK_MODULES(GNURADIO_CORE, gnuradio-core >= 3) dnl LIBS="$LIBS $GNURADIO_CORE_LIBS" ]) diff --git a/volk/configure.ac b/volk/configure.ac index c493adad6..fa3a90c7f 100644 --- a/volk/configure.ac +++ b/volk/configure.ac @@ -59,7 +59,6 @@ dnl AX_BOOST_TEST_EXEC_MONITOR AX_BOOST_UNIT_TEST_FRAMEWORK dnl AX_BOOST_WSERIALIZATION -AC_CONFIG_HEADERS([volk_config.h]) LV_SET_SIMD_FLAGS AC_CONFIG_FILES([\ diff --git a/volk/gen/.gitignore b/volk/gen/.gitignore new file mode 100644 index 000000000..a1c468f93 --- /dev/null +++ b/volk/gen/.gitignore @@ -0,0 +1,3 @@ +/config +/include +/lib diff --git a/volk/include/volk/archs.xml b/volk/gen/archs.xml index a19a5add9..f6822871f 100644 --- a/volk/include/volk/archs.xml +++ b/volk/gen/archs.xml @@ -5,14 +5,14 @@ <flag>none</flag> </arch> -<arch name="orc" type="all"> - <flag>lorc-0.4</flag> - <overrule>LV_HAVE_ORC</overrule> - <overrule_val>no</overrule_val> -</arch> - <arch name="altivec" type="powerpc"> <flag>maltivec</flag> + <alignment>16</alignment> +</arch> + +<arch name="neon" type="arm"> + <flag>mfpu=neon -mfloat-abi=softfp -funsafe-math-optimizations</flag> + <alignment>16</alignment> </arch> <arch name="32" type="x86" no_test="true" > @@ -37,6 +37,7 @@ <shift>31</shift> <flag>m3dnow</flag> <val>1</val> + <alignment>8</alignment> </arch> <arch name="abm" type="x86"> @@ -45,6 +46,7 @@ <reg>d</reg> <shift>5</shift> <flag>sse4.2</flag> + <alignment>16</alignment> </arch> <arch name="popcount" type="x86"> @@ -61,6 +63,7 @@ <reg>d</reg> <shift>23</shift> <flag>mmmx</flag> + <alignment>8</alignment> </arch> @@ -72,6 +75,7 @@ <flag>msse</flag> <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment> <include>xmmintrin.h</include> + <alignment>16</alignment> </arch> @@ -81,6 +85,13 @@ <reg>d</reg> <shift>26</shift> <flag>msse2</flag> + <alignment>16</alignment> +</arch> + +<arch name="orc" type="all"> + <flag>lorc-0.4</flag> + <overrule>LV_HAVE_ORC</overrule> + <overrule_val>no</overrule_val> </arch> <arch name="sse3" type="x86"> @@ -91,6 +102,7 @@ <flag>msse3</flag> <environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment> <include>pmmintrin.h</include> + <alignment>16</alignment> </arch> <arch name="ssse3" type="x86"> @@ -99,6 +111,7 @@ <reg>c</reg> <shift>9</shift> <flag>mssse3</flag> + <alignment>16</alignment> </arch> <arch name="sse4_a" type="x86"> @@ -107,6 +120,7 @@ <reg>c</reg> <shift>6</shift> <flag>msse4a</flag> + <alignment>16</alignment> </arch> @@ -116,6 +130,7 @@ <reg>c</reg> <shift>19</shift> <flag>msse4.1</flag> + <alignment>16</alignment> </arch> <arch name="sse4_2" type="x86"> @@ -124,16 +139,16 @@ <reg>c</reg> <shift>20</shift> <flag>msse4.2</flag> + <alignment>16</alignment> </arch> - <arch name="avx" type="x86"> <val>1</val> <op>1</op> <reg>c</reg> <shift>28</shift> <flag>mavx</flag> + <alignment>32</alignment> </arch> - </grammar> diff --git a/volk/gen/machines.xml b/volk/gen/machines.xml new file mode 100644 index 000000000..b872b9fb1 --- /dev/null +++ b/volk/gen/machines.xml @@ -0,0 +1,64 @@ +<grammar> + +<machine name="generic"> +<archs>generic</archs> +</machine> + +<!-- +<machine name="mmx"> +<archs>generic 32|64 mmx</archs> +</machine> + +<machine name="sse"> +<archs>generic 32|64 mmx sse</archs> +</machine> +--> + +<!-- +Create an SSE2 only machine (without 64/32 inline assembly support). +This machine is intended to support the MSVC compiler on x86/amd64. +The MSVC compiler has intrinsic support for SSE and SSE2, +however it does not support the gcc style inline assembly. +--> + +<machine name="neon"> +<archs>generic neon</archs> +</machine> + +<machine name="sse2_only"> +<archs>generic mmx sse sse2</archs> +</machine> + +<machine name="sse2"> +<archs>generic 32|64 mmx sse sse2</archs> +</machine> + +<machine name="sse3"> +<archs>generic 32|64 mmx sse sse2 sse3</archs> +</machine> + +<machine name="ssse3"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3</archs> +</machine> + +<machine name="sse4_a"> +<archs>generic 32|64 mmx sse sse2 sse3 sse4_a popcount</archs> +</machine> + +<machine name="sse4_1"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1</archs> +</machine> + +<machine name="sse4_2"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount</archs> +</machine> + +<machine name="avx"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx</archs> +</machine> + +<machine name="altivec"> +<archs>generic altivec</archs> +</machine> + +</grammar> diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py new file mode 100644 index 000000000..19d679e71 --- /dev/null +++ b/volk/gen/make_c.py @@ -0,0 +1,88 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * +import string + +#ok todo list: +#put n_archs into the info struct so it doesn't have to be arch_defs[0]. + +def make_c(machines, archs, functions, arched_arglist, my_arglist): + tempstring = r""" +// This file is automatically generated by make_c.py. +// Do not edit this file. +""" + tempstring += """ +#include <volk/volk_common.h> +#include "volk_machines.h" +#include <volk/volk_typedefs.h> +#include <volk/volk_cpu.h> +#include "volk_rank_archs.h" +#include <volk/volk.h> +#include <stdio.h> +#include <string.h> + +""" + +#OK here's the deal. the .h prototypes the functions. the .c impls them as fptrs, can use p_whatever. +#also .c impls the get_machine call +#also .c impls the default call for each fn + +#here do static fn get arch + tempstring += r""" +struct volk_machine *get_machine(void) { + extern struct volk_machine *volk_machines[]; + extern unsigned int n_volk_machines; + static struct volk_machine *machine = NULL; + + if(machine != NULL) return machine; + else { + unsigned int max_score = 0; + int i; + for(i=0; i<n_volk_machines; i++) { + if(!(volk_machines[i]->caps & (~volk_get_lvarch()))) { + if(volk_machines[i]->caps > max_score) { + max_score = volk_machines[i]->caps; + machine = volk_machines[i]; + } + } + } + printf("Using Volk machine: %s\n", machine->name); + return machine; + } +} + +unsigned int volk_get_alignment(void) { + return get_machine()->alignment; +} + +""" + + for i in range(len(functions)): + tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n" + tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i], functions[i]) + tempstring += " %s(%s);\n}\n\n" % (functions[i], my_arglist[i]) + tempstring += replace_volk.sub("p", functions[i]) + " " + functions[i] + " = &get_" + functions[i] + ";\n\n" + tempstring += "void %s_manual%s\n" % (functions[i], arched_arglist[i]) + tempstring += " get_machine()->%s_archs[get_index(get_machine()->%s_indices, get_machine()->%s_n_archs, arch)](%s);\n}\n" % (functions[i], functions[i], functions[i], my_arglist[i]) + tempstring += "struct volk_func_desc %s_get_func_desc(void) {\n" % (functions[i]) + tempstring += " struct volk_func_desc desc = {get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs};\n" % (functions[i], functions[i], functions[i]) + tempstring += " return desc;\n}\n" + + return tempstring + + diff --git a/volk/include/volk/make_config_fixed.py b/volk/gen/make_config_fixed.py index 3fd1bdf0a..3fd1bdf0a 100644 --- a/volk/include/volk/make_config_fixed.py +++ b/volk/gen/make_config_fixed.py diff --git a/volk/include/volk/make_config_in.py b/volk/gen/make_config_in.py index d29680af2..d29680af2 100644 --- a/volk/include/volk/make_config_in.py +++ b/volk/gen/make_config_in.py diff --git a/volk/gen/make_cpuid_c.py b/volk/gen/make_cpuid_c.py new file mode 100644 index 000000000..eb88dcd7f --- /dev/null +++ b/volk/gen/make_cpuid_c.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from xml.dom import minidom + +HEADER_TEMPL = """\ +/*this file is auto_generated by volk_register.py*/ + +#include <volk/volk_cpu.h> +#include <volk/volk_config_fixed.h> + +struct VOLK_CPU volk_cpu; + +#if defined(__i386__) || (__x86_64__) + +//implement get cpuid for gcc compilers using a copy of cpuid.h +#if defined(__GNUC__) +#include <gcc_x86_cpuid.h> +#define cpuid_x86(op, r) __get_cpuid(op, r+0, r+1, r+2, r+3) + +//implement get cpuid for MSVC compilers using __cpuid intrinsic +#elif defined(_MSC_VER) +#include <intrin.h> +#define cpuid(op, r) __cpuid(r, op) + +#else +#error "A get cpuid for volk is not available on this compiler..." +#endif + +static inline unsigned int cpuid_eax(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[0]; +} + +static inline unsigned int cpuid_ebx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[1]; +} + +static inline unsigned int cpuid_ecx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[2]; +} + +static inline unsigned int cpuid_edx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[3]; +} +#endif + +""" + +def make_cpuid_c(dom) : + tempstring = HEADER_TEMPL; + + for domarch in dom: + if str(domarch.attributes["type"].value) == "x86": + if "no_test" in domarch.attributes.keys(): + no_test = str(domarch.attributes["no_test"].value); + if no_test == "true": + no_test = True; + else: + no_test = False; + else: + no_test = False; + arch = str(domarch.attributes["name"].value); + op = domarch.getElementsByTagName("op"); + if op: + op = str(op[0].firstChild.data); + reg = domarch.getElementsByTagName("reg"); + if reg: + reg = str(reg[0].firstChild.data); + shift = domarch.getElementsByTagName("shift"); + if shift: + shift = str(shift[0].firstChild.data); + val = domarch.getElementsByTagName("val"); + if val: + val = str(val[0].firstChild.data); + + if no_test: + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + return 1; +#else + return 0; +#endif +} + +""" % (arch) + + elif op == "1": + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + unsigned int e%sx = cpuid_e%sx (%s); + return ((e%sx >> %s) & 1) == %s; +#else + return 0; +#endif +} + +""" % (arch, reg, reg, op, reg, shift, val) + + elif op == "0x80000001": + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + unsigned int extended_fct_count = cpuid_eax(0x80000000); + if (extended_fct_count < 0x80000001) + return %s^1; + unsigned int extended_features = cpuid_e%sx (%s); + return ((extended_features >> %s) & 1) == %s; +#else + return 0; +#endif +} + +""" % (arch, val, reg, op, shift, val) + + elif str(domarch.attributes["type"].value) == "powerpc": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { +#ifdef __PPC__ + return 1; +#else + return 0; +#endif +} + +""" % (arch) + + elif str(domarch.attributes["type"].value) == "arm": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +#if defined(__arm__) && defined(__linux__) +#include <asm/hwcap.h> +#include <linux/auxvec.h> +#include <stdio.h> +#define LOOK_FOR_NEON +#endif + +int i_can_has_%s () { +//it's linux-specific, but if you're compiling libvolk for NEON +//on Windows you have other problems + +#ifdef LOOK_FOR_NEON + FILE *auxvec_f; + unsigned long auxvec[2]; + unsigned int found_neon = 0; + auxvec_f = fopen("/proc/self/auxv", "rb"); + if(!auxvec_f) return 0; + + //so auxv is basically 32b of ID and 32b of value + //so it goes like this + while(!found_neon && auxvec_f) { + fread(auxvec, sizeof(unsigned long), 2, auxvec_f); + if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON)) + found_neon = 1; + } + + fclose(auxvec_f); + return found_neon; + +#else + return 0; +#endif +} + +""" % (arch) + + elif str(domarch.attributes["type"].value) == "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { + return 1; +} + +""" % (arch) + else: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { + return 0; +} + +""" % (arch) + + tempstring = tempstring + "void volk_cpu_init() {\n"; + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" + tempstring = tempstring + "}\n\n" + + tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; + tempstring = tempstring + " unsigned int retval = 0;\n" + tempstring = tempstring + " volk_cpu_init();\n" + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" + tempstring = tempstring + " return retval;\n" + tempstring = tempstring + "}\n\n" + + return tempstring; + + + + + + + diff --git a/volk/include/volk/make_cpuid_h.py b/volk/gen/make_cpuid_h.py index cd3da2455..4fe5c4e07 100644 --- a/volk/include/volk/make_cpuid_h.py +++ b/volk/gen/make_cpuid_h.py @@ -21,14 +21,14 @@ # from xml.dom import minidom -from emit_omnilog import * def make_cpuid_h(dom) : tempstring = ""; tempstring = tempstring +'/*this file is auto generated by volk_register.py*/'; tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_CPU_H'; tempstring = tempstring +'\n#define INCLUDED_VOLK_CPU_H\n\n'; - tempstring = tempstring + emit_prolog(); + tempstring = tempstring + "#include <volk/volk_common.h>\n\n"; + tempstring = tempstring + '__VOLK_DECL_BEGIN\n'; tempstring = tempstring + '\n' tempstring = tempstring + "struct VOLK_CPU {\n" @@ -42,7 +42,7 @@ def make_cpuid_h(dom) : tempstring = tempstring + "unsigned int volk_get_lvarch ();\n" tempstring = tempstring + "\n"; - tempstring = tempstring + emit_epilog(); + tempstring = tempstring + "__VOLK_DECL_END\n"; tempstring = tempstring + "#endif /*INCLUDED_VOLK_CPU_H*/\n" return tempstring; diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py new file mode 100644 index 000000000..44e2ef3f2 --- /dev/null +++ b/volk/gen/make_each_machine_c.py @@ -0,0 +1,86 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * +import string + +def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist, alignment): + + #make the machine fcountlist and taglist a subset given the archs list + machine_fcountlists = list() + machine_taglists = list() + for i in range(len(fcountlist)): + machine_fcountlist = list() + machine_taglist = list() + for j in range(len(fcountlist[i])): + if len(set(archs).intersection(map(str.lower, fcountlist[i][j]))) == len(fcountlist[i][j]): + machine_fcountlist.append(fcountlist[i][j]) + machine_taglist.append(taglist[i][j]) + machine_fcountlists.append(machine_fcountlist) + machine_taglists.append(machine_taglist) + + #create the volk machine struct for this machine file + tempstring = "" + tempstring += "struct volk_machine volk_machine_" + machine_name + " = {\n" + tempstring += " " + ' | '.join(["(1 << LV_" + arch.swapcase() + ")" for arch in archs]) + ",\n" + tempstring += " \"%s\",\n"%machine_name + tempstring += " %s,\n"%alignment + + #fill in the description for each function + for i in range(len(functions)): + tempstring += " \"%s\",\n"%functions[i] + tempstring += " {%s},\n"%(', '.join(['"%s"'%tag for tag in machine_taglists[i]])) + tempstring += " {%s},\n"%(', '.join([' | '.join(['(1 << LV_%s)'%fc for fc in fcount]) for fcount in machine_fcountlists[i]])) + tempstring += " {%s},\n"%(', '.join(['%s_%s'%(functions[i], tag) for tag in machine_taglists[i]])) + tempstring += " %d,\n"%len(machine_taglists[i]) + + tempstring = strip_trailing(tempstring, ",") + tempstring += "};\n" + return tempstring + +def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist, alignment): + tempstring = r""" +// This file is automatically generated by make_each_machine_c.py. +// Do not edit this file. +""" + for arch in archs: + tempstring += "#define LV_HAVE_" + arch.swapcase() + " 1\n" + + tempstring += """ +#include <volk/volk_common.h> +#include "volk_machines.h" +#include <volk/volk_config_fixed.h> + +""" + for func in functions: + tempstring += "#include <volk/" + func + ".h>\n" + tempstring += "\n\n" + + tempstring += """ +#ifdef LV_HAVE_ORC +%s +#else +%s +#endif +"""%( + _make_each_machine_struct(machine_name, archs+["orc"], functions, fcountlist, taglist, alignment), + _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist, alignment) +) + + return tempstring + + diff --git a/volk/include/volk/make_environment_init_c.py b/volk/gen/make_environment_init_c.py index e06c7f246..263d5bcd1 100644 --- a/volk/include/volk/make_environment_init_c.py +++ b/volk/gen/make_environment_init_c.py @@ -4,13 +4,12 @@ def make_environment_init_c(dom) : tempstring = ""; tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; tempstring = tempstring + "#include<volk/volk_environment_init.h>\n" - tempstring = tempstring + "#include<volk/volk_config.h>\n" for domarch in dom: arch = str(domarch.attributes["name"].value); incs = domarch.getElementsByTagName("include"); for inc in incs: my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; + tempstring = tempstring + "#ifdef LV_HAVE_" + arch.swapcase() + "\n"; tempstring = tempstring + "#include<" + my_inc + ">\n"; tempstring = tempstring + "#endif\n" tempstring = tempstring + '\n\n'; @@ -21,7 +20,7 @@ def make_environment_init_c(dom) : envs = domarch.getElementsByTagName("environment"); for env in envs: cmd = str(env.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; + tempstring = tempstring + "#ifdef LV_HAVE_" + arch.swapcase() + "\n"; tempstring = tempstring + " " + cmd + "\n"; tempstring = tempstring + "#endif\n" diff --git a/volk/include/volk/make_environment_init_h.py b/volk/gen/make_environment_init_h.py index 77a841a24..655d73f54 100644 --- a/volk/include/volk/make_environment_init_h.py +++ b/volk/gen/make_environment_init_h.py @@ -1,5 +1,4 @@ from xml.dom import minidom -from emit_omnilog import * def make_environment_init_h() : tempstring = ""; @@ -7,9 +6,10 @@ def make_environment_init_h() : tempstring = tempstring + "#ifndef INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; tempstring = tempstring + "#define INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; tempstring = tempstring + "\n"; - tempstring = tempstring + emit_prolog(); + tempstring = tempstring + "#include <volk/volk_common.h>\n\n"; + tempstring = tempstring + "__VOLK_DECL_BEGIN\n"; tempstring = tempstring + "void volk_environment_init();\n"; - tempstring = tempstring + emit_epilog(); + tempstring = tempstring + "__VOLK_DECL_END\n"; tempstring = tempstring + "#endif\n" return tempstring; diff --git a/volk/gen/make_h.py b/volk/gen/make_h.py new file mode 100644 index 000000000..354e57258 --- /dev/null +++ b/volk/gen/make_h.py @@ -0,0 +1,38 @@ +from xml.dom import minidom +from volk_regexp import * + +def make_h(funclist, arched_arglist) : + tempstring = ""; + tempstring = tempstring + '/*this file is auto generated by make_h.py*/\n'; + + tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_RUNTIME'; + tempstring = tempstring + '\n#define INCLUDED_VOLK_RUNTIME'; + tempstring = tempstring + '\n\n#include<volk/volk_typedefs.h>\n'; + tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; + tempstring = tempstring + '#include<volk/volk_common.h>\n'; + tempstring = tempstring + '#include<volk/volk_complex.h>\n'; + tempstring = tempstring + '__VOLK_DECL_BEGIN\n'; + + tempstring = tempstring + '\n'; + + tempstring += """ +struct volk_func_desc { + const char **indices; + const int *arch_defs; + const int n_archs; +}; + +VOLK_API unsigned int volk_get_alignment(void); + +""" + for i in range(len(funclist)): + tempstring += "extern VOLK_API " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n" + tempstring += "extern VOLK_API void %s_manual%s;\n" % (funclist[i], arched_arglist[i]) + tempstring = strip_trailing(tempstring, " {") + tempstring += "extern VOLK_API struct volk_func_desc %s_get_func_desc(void);\n" % (funclist[i]) + + tempstring = tempstring + '__VOLK_DECL_END\n'; + tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n"; + + return tempstring; + diff --git a/volk/gen/make_machines_c.py b/volk/gen/make_machines_c.py new file mode 100644 index 000000000..a7ab63d6e --- /dev/null +++ b/volk/gen/make_machines_c.py @@ -0,0 +1,41 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * + +def make_machines_c(machines): + tempstring = r""" +// This file is automatically generated by make_machines_c.py. +// Do not edit this file. + +#include <volk/volk_common.h> +#include <volk/volk_typedefs.h> +#include "volk_machines.h" + +struct volk_machine *volk_machines[] = { +""" + for machine in machines: + tempstring += """#if LV_MACHINE_""" + machine.swapcase() + "\n" + tempstring += "&volk_machine_" + machine + tempstring += "," + tempstring += "\n#endif\n" + + tempstring += r""" +}; +unsigned int n_volk_machines = sizeof(volk_machines)/sizeof(*volk_machines); +""" + return tempstring diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py new file mode 100644 index 000000000..a48caa89c --- /dev/null +++ b/volk/gen/make_machines_h.py @@ -0,0 +1,59 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * + +def make_machines_h(functions, machines, archs): + tempstring = r""" +// This file is automatically generated by make_machines_h.py. +// Do not edit this file. + +#ifndef INCLUDED_LIBVOLK_MACHINES_H +#define INCLUDED_LIBVOLK_MACHINES_H + +#include <volk/volk_common.h> +#include <volk/volk_typedefs.h> + +__VOLK_DECL_BEGIN + +struct volk_machine { + const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format) + const char *name; + const unsigned int alignment; //the maximum byte alignment required for functions in this library +""" + for function in functions: + tempstring += " const char *%s_name;\n"%function + tempstring += " const char *%s_indices[%d];\n"%(function, len(archs)) + tempstring += " const int %s_arch_defs[%d];\n"%(function, len(archs)) + tempstring += " const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs)) + tempstring += " const int %s_n_archs;\n"%function + + tempstring += r"""}; + +""" + for machine in machines: + tempstring += """#if LV_MACHINE_""" + machine.swapcase() + "\n" + tempstring += "extern struct volk_machine volk_machine_" + machine + ";\n" + tempstring += """#endif\n""" + + tempstring += r""" + +__VOLK_DECL_END + +#endif //INCLUDED_LIBVOLK_MACHINES_H""" + + return tempstring diff --git a/volk/gen/make_makefile_am.py b/volk/gen/make_makefile_am.py new file mode 100644 index 000000000..f843b4413 --- /dev/null +++ b/volk/gen/make_makefile_am.py @@ -0,0 +1,123 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from xml.dom import minidom + +def make_makefile_am(dom, machines, archflags_dict): + tempstring = r""" +# This file is automatically generated by make_makefile_am.py. +# Do not edit this file. + +include $(top_srcdir)/Makefile.common + +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ + -I$(top_srcdir)/include \ + -I$(top_gendir)/include \ + -Dvolk_EXPORTS \ + -fvisibility=hidden \ + $(WITH_INCLUDES) + +lib_LTLIBRARIES = \ + libvolk.la + +EXTRA_DIST = \ + volk_rank_archs.h \ + gcc_x86_cpuid.h + +# ---------------------------------------------------------------- +# The main library +# ---------------------------------------------------------------- + +libvolk_la_SOURCES = \ + $(platform_CODE) \ + $(top_gendir)/lib/volk.c \ + $(top_gendir)/lib/volk_cpu.c \ + volk_rank_archs.c \ + volk_prefs.c \ + $(top_gendir)/lib/volk_machines.c + +if LV_HAVE_ORC +volk_orc_CFLAGS = -DLV_HAVE_ORC=1 +volk_orc_LDFLAGS = $(ORC_LDFLAGS) -lorc-0.4 +volk_orc_LIBADD = ../orc/libvolk_orc.la +else +volk_orc_CFLAGS = +volk_orc_LDFLAGS = +volk_orc_LIBADD = +endif + +libvolk_la_CPPFLAGS = $(AM_CPPFLAGS) $(volk_orc_CFLAGS) +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) + +noinst_LTLIBRARIES = + +""" + + #here be dragons + for machine_name in machines: + tempstring += "if LV_MACHINE_" + machine_name.swapcase() + "\n" + tempstring += "libvolk_" + machine_name + "_la_SOURCES = $(top_gendir)/lib/volk_machine_" + machine_name + ".c\n" + tempstring += "libvolk_" + machine_name + "_la_CPPFLAGS = -I$(top_srcdir)/include -I$(top_gendir)/include $(volk_orc_CFLAGS) " + for arch in machines[machine_name]: + if archflags_dict[arch] != "none": + tempstring += "-" + archflags_dict[arch] + " " + + tempstring += "\nnoinst_LTLIBRARIES += libvolk_" + machine_name + ".la " + tempstring += "\nlibvolk_la_LIBADD += libvolk_" + machine_name + ".la\n" + tempstring += "libvolk_la_CPPFLAGS += -DLV_MACHINE_" + machine_name.swapcase() + " \n" + tempstring += "endif\n" + + + tempstring += r""" + +# ---------------------------------------------------------------- +# The QA library. Note libvolk.la in LIBADD +# ---------------------------------------------------------------- +#libvolk_qa_la_SOURCES = \ +# qa_utils.cc + +#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost + +#libvolk_qa_la_LIBADD = \ +# libvolk.la \ +# libvolk_runtime.la + +# ---------------------------------------------------------------- +# headers that don't get installed +# ---------------------------------------------------------------- +noinst_HEADERS = \ + $(top_gendir)/lib/volk_init.h \ + $(top_gendir)/lib/volk_machines.h \ + $(top_gendir)/lib/volk_environment_init.h \ + qa_utils.h + +# ---------------------------------------------------------------- +# Our test program +# ---------------------------------------------------------------- +noinst_PROGRAMS = \ + testqa + +testqa_SOURCES = testqa.cc qa_utils.cc +testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) +testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) +testqa_LDADD = \ + libvolk.la +""" + + + return tempstring diff --git a/volk/include/volk/make_proccpu_sim.py b/volk/gen/make_proccpu_sim.py index 029dacfcc..029dacfcc 100644 --- a/volk/include/volk/make_proccpu_sim.py +++ b/volk/gen/make_proccpu_sim.py diff --git a/volk/gen/make_set_simd.py b/volk/gen/make_set_simd.py new file mode 100644 index 000000000..5a848e59e --- /dev/null +++ b/volk/gen/make_set_simd.py @@ -0,0 +1,166 @@ +# +# Copyright 2010 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from xml.dom import minidom + +def make_set_simd(dom, machines) : + tempstring = ""; + tempstring = tempstring +'dnl this file is auto generated by volk_register.py\n\n'; + + tempstring = tempstring +'\ndnl define arch checks\n'; + for domarch in dom: + if str(domarch.attributes["type"].value) != "all": + arch = str(domarch.attributes["name"].value); + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + tempstring = tempstring + "AC_DEFUN([_TRY_ADD_" + arch.swapcase() + "],\n"; + tempstring = tempstring + "[\n"; + tempstring = tempstring + " LF_CHECK_CC_FLAG([-" + flag + "])\n"; + tempstring = tempstring + " LF_CHECK_CXX_FLAG([-" + flag + "])\n"; + tempstring = tempstring + "])\n"; + + tempstring = tempstring +'\ndnl main set_simd_flags\n'; + tempstring = tempstring + "AC_DEFUN([LV_SET_SIMD_FLAGS],\n"; + tempstring = tempstring + "[\n"; + #tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; + tempstring = tempstring + " AC_SUBST(LV_CXXFLAGS)\n"; + tempstring = tempstring + " indCC=no\n"; + tempstring = tempstring + " indCXX=no\n"; + tempstring = tempstring + " indLV_ARCH=no\n"; + tempstring = tempstring + " AC_ARG_WITH(lv_arch,\n"; + tempstring = tempstring + " AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list("; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + arch + ", " + tempstring = tempstring[0:len(tempstring) - 2]; + + tempstring = tempstring + ")]),\n"; + tempstring = tempstring + " [cf_with_lv_arch=\"$withval\"],\n"; + tempstring = tempstring + " [cf_with_lv_arch=\"\"])\n"; + if str(domarch.attributes["type"].value) == "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " AC_DEFINE(LV_MAKE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; + tempstring = tempstring + " ADDONS=\"\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; + #tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; + tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; + tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; + tempstring = tempstring + " cf_with_lv_arch=\""; + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + arch + " "; + tempstring = tempstring[0:-1] + "\"\n"; + tempstring = tempstring + " OVERRULE_FLAG=\"yes\"\n"; + tempstring = tempstring + " fi\n"; + + tempstring = tempstring +'\ndnl init LV_MAKE_XXX and then try to add archs\n'; + for domarch in dom: + if str(domarch.attributes["type"].value) != "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=no\n"; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + if atype != "all": + tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + tempstring = tempstring +'\ndnl add in flags for arch ' + arch + '\n'; + overrule = domarch.getElementsByTagName("overrule"); + if overrule: + overrule = str(overrule[0].firstChild.data); + else: + overrule = ""; + overrule_val = domarch.getElementsByTagName("overrule_val"); + if overrule_val: + overrule_val = str(overrule_val[0].firstChild.data); + else: + overrule_val = ""; + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + if atype != "all": + tempstring = tempstring + " for i in $lf_CXXFLAGS\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; + tempstring = tempstring + " indCXX=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " for i in $lf_CFLAGS\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; + tempstring = tempstring + " indCC=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + + tempstring = tempstring + " if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + + #tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indCC=no\n" + tempstring = tempstring + " indCXX=no\n" + tempstring = tempstring + " indLV_ARCH=no\n" + else: + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " AM_CONDITIONAL(LV_MAKE_" + arch.swapcase() + ", test \"$LV_MAKE_" + arch.swapcase() + "\" == \"yes\")\n"; + + tempstring += "\n" + #now we can define the machines we're compiling + for machine_name in machines: + tempstring += " AM_CONDITIONAL(LV_MACHINE_" + machine_name.swapcase() + ", " + marchlist = machines[machine_name] + for march in marchlist: + tempstring += "test \"$LV_MAKE_" + march.swapcase() + "\" == \"yes\" && " + + tempstring += "test true)\n" #just so we don't have to detect the last one in the group, i know + tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" + tempstring = tempstring + "])\n" + + return tempstring; + + diff --git a/volk/include/volk/make_typedefs.py b/volk/gen/make_typedefs.py index fe81cb2b0..8f9f2b55e 100644 --- a/volk/include/volk/make_typedefs.py +++ b/volk/gen/make_typedefs.py @@ -16,7 +16,7 @@ def make_typedefs(funclist, retlist, my_argtypelist) : tempstring = tempstring + '\n'; for i in range(len(funclist)): - tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n\n"; + tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n"; tempstring = tempstring + "#endif /*INCLUDED_VOLK_TYPEDEFS*/\n"; diff --git a/volk/include/volk/volk_regexp.py b/volk/gen/volk_regexp.py index 7b695cb3b..b83ce5206 100644 --- a/volk/include/volk/volk_regexp.py +++ b/volk/gen/volk_regexp.py @@ -1,4 +1,5 @@ import re +import string remove_after_underscore = re.compile("_.*"); space_remove = re.compile(" "); @@ -6,3 +7,8 @@ leading_space_remove = re.compile("^ *"); replace_arch = re.compile(", const char\* arch"); replace_bracket = re.compile(" {"); replace_volk = re.compile("volk"); + +def strip_trailing(tostrip, stripstr): + lindex = tostrip.rfind(stripstr) + tostrip = tostrip[0:lindex] + string.replace(tostrip[lindex:len(tostrip)], stripstr, ""); + return tostrip diff --git a/volk/include/volk/volk_register.py b/volk/gen/volk_register.py index bc8f959af..75e5eeb87 100755..100644 --- a/volk/include/volk/volk_register.py +++ b/volk/gen/volk_register.py @@ -1,50 +1,54 @@ #! /usr/bin/env python import sys +import os import re import string from xml.dom import minidom from volk_regexp import * -from make_cpuid_x86_c import make_cpuid_x86_c +from make_cpuid_c import make_cpuid_c from make_cpuid_h import make_cpuid_h -from make_proccpu_sim import make_proccpu_sim from make_set_simd import make_set_simd -from make_cpuid_generic_c import make_cpuid_generic_c -from make_cpuid_powerpc_c import make_cpuid_powerpc_c -from make_registry import make_registry -from make_h import make_h -from make_init_h import make_init_h from make_config_fixed import make_config_fixed -from make_config_in import make_config_in -from make_c import make_c -from make_runtime_c import make_runtime_c -from make_init_c import make_init_c -from make_runtime import make_runtime from make_typedefs import make_typedefs -from make_mktables import make_mktables from make_environment_init_c import make_environment_init_c from make_environment_init_h import make_environment_init_h - -outfile_set_simd = open("../../config/lv_set_simd_flags.m4", "w"); -outfile_reg = open("volk_registry.h", "w"); -outfile_h = open("volk.h", "w"); -outfile_c = open("../../lib/volk.c", "w"); -outfile_runtime = open("volk_runtime.h", "w"); -outfile_runtime_c = open("../../lib/volk_runtime.c", "w"); -outfile_typedefs = open("volk_typedefs.h", "w"); -outfile_init_h = open("../../lib/volk_init.h", "w"); -outfile_init_c = open("../../lib/volk_init.c", "w"); -outfile_cpu_h = open("volk_cpu.h", "w"); -outfile_cpu_x86_c = open("../../lib/volk_cpu_x86.c", "w"); -outfile_cpu_generic_c = open("../../lib/volk_cpu_generic.c", "w"); -outfile_cpu_powerpc_c = open("../../lib/volk_cpu_powerpc.c", "w"); -outfile_proccpu_sim = open("../../lib/volk_proccpu_sim.c", "w"); -outfile_config_in = open("../../volk_config.h.in", "w"); -outfile_config_fixed = open("volk_config_fixed.h", "w"); -outfile_mktables = open("../../lib/volk_mktables.c", "w"); -outfile_environment_c = open("../../lib/volk_environment_init.c", "w"); -outfile_environment_h = open("volk_environment_init.h", "w"); -infile = open("Makefile.am", "r"); +from make_makefile_am import make_makefile_am +from make_machines_h import make_machines_h +from make_machines_c import make_machines_c +from make_each_machine_c import make_each_machine_c +from make_c import make_c +from make_h import make_h +import copy + +#set srcdir and gendir +srcdir = os.path.dirname(os.path.dirname(__file__)) +try: gendir = sys.argv[1] +except: gendir = os.path.dirname(__file__) + +#ensure directories exist +for dir in ( + (os.path.join(gendir, 'include', 'volk')), + (os.path.join(gendir, 'lib')), + (os.path.join(gendir, 'config')) +): + if not os.path.exists(dir): os.makedirs(dir) + +outfile_set_simd = open(os.path.join(gendir, "config/lv_set_simd_flags.m4"), "w") +outfile_h = open(os.path.join(gendir, "include/volk/volk.h"), "w") +outfile_c = open(os.path.join(gendir, "lib/volk.c"), "w") +outfile_typedefs = open(os.path.join(gendir, "include/volk/volk_typedefs.h"), "w") +outfile_init_h = open(os.path.join(gendir, "lib/volk_init.h"), "w") +outfile_cpu_h = open(os.path.join(gendir, "include/volk/volk_cpu.h"), "w") +outfile_cpu_c = open(os.path.join(gendir, "lib/volk_cpu.c"), "w") +#outfile_config_in = open(os.path.join(gendir, "include/volk/volk_config.h.in"), "w") +outfile_config_fixed = open(os.path.join(gendir, "include/volk/volk_config_fixed.h"), "w") +outfile_environment_c = open(os.path.join(gendir, "lib/volk_environment_init.c"), "w") +outfile_environment_h = open(os.path.join(gendir, "lib/volk_environment_init.h"), "w") +outfile_makefile_am = open(os.path.join(gendir, "lib/Makefile.am"), "w") +outfile_machines_h = open(os.path.join(gendir, "lib/volk_machines.h"), "w") +outfile_machines_c = open(os.path.join(gendir, "lib/volk_machines.c"), "w") +infile = open(os.path.join(srcdir, "include/volk/Makefile.am"), "r") mfile = infile.readlines(); @@ -55,7 +59,7 @@ functions = []; for line in mfile: - subline = re.search(".*_(a16|u)\.h.*", line); + subline = re.search(".*_(a|u)\.h.*", line); if subline: subsubline = re.search("(?<=volk_).*", subline.group(0)); if subsubline: @@ -70,14 +74,14 @@ datatypes = set(datatypes); for line in mfile: for dt in datatypes: if dt in line: - subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line); + subline = re.search("(volk_" + dt +"_.*(a|u).*\.h)", line); if subline: subsubline = re.search(".+(?=\.h)", subline.group(0)); functions.append(subsubline.group(0)); archs = []; -afile = minidom.parse("archs.xml"); +afile = minidom.parse(os.path.join(srcdir, "gen/archs.xml")) filearchs = afile.getElementsByTagName("arch"); for filearch in filearchs: archs.append(str(filearch.attributes["name"].value)); @@ -86,8 +90,18 @@ for arch in archs: a_var = re.search("^\$", arch); if a_var: archs.remove(arch); + + +archflags_dict = {} +for filearch in filearchs: + archflags_dict[str(filearch.attributes["name"].value)] = str(filearch.getElementsByTagName("flag")[0].firstChild.data) +archalign_dict = {} +for filearch in filearchs: + alignelem = filearch.getElementsByTagName("alignment") + if(alignelem): + archalign_dict[str(filearch.attributes["name"].value)] = int(alignelem[0].firstChild.data) archs_or = "(" for arch in archs: @@ -95,7 +109,44 @@ for arch in archs: archs_or = archs_or[0:len(archs_or)-1]; archs_or = archs_or + ")"; - +#get machine list and parse to a list of machines, each with a list of archs (none of this DOM crap) +machine_str_dict = {} +mfile = minidom.parse(os.path.join(srcdir, "gen/machines.xml")) +filemachines = mfile.getElementsByTagName("machine") + +for filemachine in filemachines: + machine_str_dict[str(filemachine.attributes["name"].value)] = str(filemachine.getElementsByTagName("archs")[0].firstChild.data).split() + +#all right now you have a dict of arch lists +#next we expand it +#this is an expanded list accounting for the OR syntax +#TODO: make this work for multiple "|" machines +machines = {} +already_done = False +for machine_name in machine_str_dict: + already_done = False + marchlist = machine_str_dict[machine_name] + for march in marchlist: + or_marchs = march.split("|") + if len(or_marchs) > 1: + marchlist.remove(march) + for or_march in or_marchs: + tempmarchlist = copy.deepcopy(marchlist) + tempmarchlist.append(or_march) + machines[machine_name + "_" + or_march] = tempmarchlist + already_done = True + + if not already_done: + machines[machine_name] = marchlist + +#get the maximum alignment for all archs in a machine +machine_alignment_dict = {} +for machine in machines: + machine_alignment_dict[machine] = max((archalign_dict.get(k, 1)) for k in machines[machine]) + +#for machine in machine_alignment_dict: +# print machine + ": %d" % machine_alignment_dict[machine] + taglist = []; fcountlist = []; arched_arglist = []; @@ -105,13 +156,13 @@ my_argtypelist = []; for func in functions: tags = []; fcount = []; - infile_source = open(func + ".h"); + infile_source = open(os.path.join(srcdir, 'include', 'volk', func + ".h")) begun_name = 0; begun_paren = 0; sourcefile = infile_source.readlines(); infile_source.close(); for line in sourcefile: - +#FIXME: make it work for multiple #if define()s archline = re.search("^\#if.*?LV_HAVE_" + archs_or + ".*", line); if archline: arch = archline.group(0); @@ -219,60 +270,39 @@ for func in functions: fcountlist.append(fcount); taglist.append(tags); -outfile_mktables.write(make_mktables(functions)); -outfile_mktables.close(); - outfile_cpu_h.write(make_cpuid_h(filearchs)); outfile_cpu_h.close(); -outfile_cpu_x86_c.write(make_cpuid_x86_c(filearchs)); -outfile_cpu_x86_c.close(); - -outfile_proccpu_sim.write(make_proccpu_sim(filearchs)); -outfile_proccpu_sim.close(); +outfile_cpu_c.write(make_cpuid_c(filearchs)); +outfile_cpu_c.close(); -outfile_set_simd.write(make_set_simd(filearchs)); +outfile_set_simd.write(make_set_simd(filearchs, machines)); outfile_set_simd.close(); -outfile_cpu_generic_c.write(make_cpuid_generic_c(filearchs)); -outfile_cpu_generic_c.close(); - -outfile_cpu_powerpc_c.write(make_cpuid_powerpc_c(filearchs)); -outfile_cpu_powerpc_c.close(); - -outfile_config_in.write(make_config_in(filearchs)); -outfile_config_in.close(); - -outfile_reg.write(make_registry(filearchs, functions, fcountlist)); -outfile_reg.close(); - -outfile_h.write(make_h(functions, arched_arglist, retlist)); -outfile_h.close(); - -outfile_init_h.write(make_init_h(functions, arched_arglist, retlist)); -outfile_init_h.close(); - outfile_config_fixed.write(make_config_fixed(filearchs)); outfile_config_fixed.close(); -outfile_c.write( make_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_c.close(); +outfile_typedefs.write(make_typedefs(functions, retlist, my_argtypelist)); +outfile_typedefs.close(); -outfile_runtime_c.write(make_runtime_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_runtime_c.close(); +outfile_makefile_am.write(make_makefile_am(filearchs, machines, archflags_dict)) +outfile_makefile_am.close() -outfile_init_c.write(make_init_c(functions, filearchs)); -outfile_init_c.close(); +outfile_machines_h.write(make_machines_h(functions, machines, archs)) +outfile_machines_h.close() -outfile_runtime.write(make_runtime(functions)); -outfile_runtime.close(); +outfile_machines_c.write(make_machines_c(machines)) +outfile_machines_c.close() -outfile_typedefs.write(make_typedefs(functions, retlist, my_argtypelist)); -outfile_typedefs.close(); +outfile_c.write(make_c(machines, archs, functions, arched_arglist, my_arglist)) +outfile_c.close() -outfile_environment_c.write(make_environment_init_c(filearchs)); -outfile_environment_c.close(); +outfile_h.write(make_h(functions, arched_arglist)) +outfile_h.close() -outfile_environment_h.write(make_environment_init_h()); -outfile_environment_h.close(); +for machine in machines: + machine_c_filename = os.path.join(gendir, "lib/volk_machine_" + machine + ".c") + outfile_machine_c = open(machine_c_filename, "w") + outfile_machine_c.write(make_each_machine_c(machine, machines[machine], functions, fcountlist, taglist, machine_alignment_dict[machine])) + outfile_machine_c.close() diff --git a/volk/include/volk/.gitignore b/volk/include/volk/.gitignore index be8358f3a..b336cc7ce 100644 --- a/volk/include/volk/.gitignore +++ b/volk/include/volk/.gitignore @@ -1,20 +1,2 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.h -/volk_config.h -/volk_config_fixed.h -/volk_cpu.h -/volk_environment_init.h -/volk_registry.h -/volk_runtime.h -/volk_tables.h -/volk_typedefs.h -/volk_mktables diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 7a5edd624..b7da9b37c 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -20,142 +20,104 @@ include $(top_srcdir)/Makefile.common -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ $(LV_CXXFLAGS) $(WITH_INCLUDES) volkincludedir = $(prefix)/include/volk -BUILT_SOURCES: \ - volk_config.h \ - volk_tables.h - volkinclude_HEADERS = \ volk_complex.h \ volk_common.h \ - volk_config_fixed.h \ - volk_runtime.h \ - volk_config.h \ - volk_tables.h \ - volk_typedefs.h \ - volk_registry.h \ - volk.h \ - volk_cpu.h \ - volk_environment_init.h \ - volk_16i_x5_add_quad_16i_x4_a16.h \ - volk_16i_branch_4_state_8_a16.h \ - volk_16ic_deinterleave_16i_x2_a16.h \ - volk_16ic_s32f_deinterleave_32f_x2_a16.h \ - volk_16ic_deinterleave_real_16i_a16.h \ - volk_16ic_s32f_deinterleave_real_32f_a16.h \ - volk_16ic_deinterleave_real_8i_a16.h \ - volk_16ic_magnitude_16i_a16.h \ - volk_16ic_s32f_magnitude_32f_a16.h \ - volk_16i_s32f_convert_32f_a16.h \ + volk_prefs.h \ + $(top_gendir)/include/volk/volk_config_fixed.h \ + $(top_gendir)/include/volk/volk_typedefs.h \ + $(top_gendir)/include/volk/volk.h \ + $(top_gendir)/include/volk/volk_cpu.h \ + volk_16i_x5_add_quad_16i_x4_a.h \ + volk_16i_branch_4_state_8_a.h \ + volk_16ic_deinterleave_16i_x2_a.h \ + volk_16ic_s32f_deinterleave_32f_x2_a.h \ + volk_16ic_deinterleave_real_16i_a.h \ + volk_16ic_s32f_deinterleave_real_32f_a.h \ + volk_16ic_deinterleave_real_8i_a.h \ + volk_16ic_magnitude_16i_a.h \ + volk_16ic_s32f_magnitude_32f_a.h \ + volk_16i_s32f_convert_32f_a.h \ volk_16i_s32f_convert_32f_u.h \ - volk_16i_convert_8i_a16.h \ + volk_16i_convert_8i_a.h \ volk_16i_convert_8i_u.h \ - volk_16i_max_star_16i_a16.h \ - volk_16i_max_star_horizontal_16i_a16.h \ - volk_16i_permute_and_scalar_add_a16.h \ - volk_16i_x4_quad_max_star_16i_a16.h \ - volk_16u_byteswap_a16.h \ - volk_32f_accumulator_s32f_a16.h \ - volk_32f_x2_add_32f_a16.h \ - volk_32fc_32f_multiply_32fc_a16.h \ - volk_32fc_s32f_power_32fc_a16.h \ - volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \ - volk_32fc_s32f_atan2_32f_a16.h \ - volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \ + volk_16i_max_star_16i_a.h \ + volk_16i_max_star_horizontal_16i_a.h \ + volk_16i_permute_and_scalar_add_a.h \ + volk_16i_x4_quad_max_star_16i_a.h \ + volk_16u_byteswap_a.h \ + volk_32f_accumulator_s32f_a.h \ + volk_32f_x2_add_32f_a.h \ + volk_32fc_32f_multiply_32fc_a.h \ + volk_32fc_s32f_power_32fc_a.h \ + volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \ + volk_32fc_s32f_atan2_32f_a.h \ + volk_32fc_x2_conjugate_dot_prod_32fc_a.h \ volk_32fc_x2_conjugate_dot_prod_32fc_u.h \ - volk_32fc_deinterleave_32f_x2_a16.h \ - volk_32fc_deinterleave_64f_x2_a16.h \ - volk_32fc_s32f_deinterleave_real_16i_a16.h \ - volk_32fc_deinterleave_real_32f_a16.h \ - volk_32fc_deinterleave_real_64f_a16.h \ - volk_32fc_x2_dot_prod_32fc_a16.h \ - volk_32fc_index_max_16u_a16.h \ - volk_32fc_s32f_magnitude_16i_a16.h \ - volk_32fc_magnitude_32f_a16.h \ - volk_32fc_x2_multiply_32fc_a16.h \ - volk_32f_s32f_convert_16i_a16.h \ + volk_32fc_deinterleave_32f_x2_a.h \ + volk_32fc_deinterleave_64f_x2_a.h \ + volk_32fc_s32f_deinterleave_real_16i_a.h \ + volk_32fc_deinterleave_real_32f_a.h \ + volk_32fc_deinterleave_real_64f_a.h \ + volk_32fc_x2_dot_prod_32fc_a.h \ + volk_32fc_index_max_16u_a.h \ + volk_32fc_s32f_magnitude_16i_a.h \ + volk_32fc_magnitude_32f_a.h \ + volk_32fc_x2_multiply_32fc_a.h \ + volk_32f_s32f_convert_16i_a.h \ volk_32f_s32f_convert_16i_u.h \ - volk_32f_s32f_convert_32i_a16.h \ + volk_32f_s32f_convert_32i_a.h \ volk_32f_s32f_convert_32i_u.h \ - volk_32f_convert_64f_a16.h \ + volk_32f_convert_64f_a.h \ volk_32f_convert_64f_u.h \ - volk_32f_s32f_convert_8i_a16.h \ + volk_32f_s32f_convert_8i_a.h \ volk_32f_s32f_convert_8i_u.h \ - volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \ - volk_32fc_s32f_power_spectrum_32f_a16.h \ - volk_32fc_x2_square_dist_32f_a16.h \ - volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \ - volk_32f_x2_divide_32f_a16.h \ - volk_32f_x2_dot_prod_32f_a16.h \ + volk_32fc_s32f_x2_power_spectral_density_32f_a.h \ + volk_32fc_s32f_power_spectrum_32f_a.h \ + volk_32fc_x2_square_dist_32f_a.h \ + volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h \ + volk_32f_x2_divide_32f_a.h \ + volk_32f_x2_dot_prod_32f_a.h \ volk_32f_x2_dot_prod_32f_u.h \ - volk_32f_s32f_32f_fm_detect_32f_a16.h \ - volk_32f_index_max_16u_a16.h \ - volk_32f_x2_s32f_interleave_16ic_a16.h \ - volk_32f_x2_interleave_32fc_a16.h \ - volk_32f_x2_max_32f_a16.h \ - volk_32f_x2_min_32f_a16.h \ - volk_32f_x2_multiply_32f_a16.h \ - volk_32f_s32f_normalize_a16.h \ - volk_32f_s32f_power_32f_a16.h \ - volk_32f_sqrt_32f_a16.h \ - volk_32f_s32f_stddev_32f_a16.h \ - volk_32f_stddev_and_mean_32f_x2_a16.h \ - volk_32f_x2_subtract_32f_a16.h \ - volk_32f_x3_sum_of_poly_32f_a16.h \ - volk_32i_x2_and_32i_a16.h \ - volk_32i_s32f_convert_32f_a16.h \ + volk_32f_s32f_32f_fm_detect_32f_a.h \ + volk_32f_index_max_16u_a.h \ + volk_32f_x2_s32f_interleave_16ic_a.h \ + volk_32f_x2_interleave_32fc_a.h \ + volk_32f_x2_max_32f_a.h \ + volk_32f_x2_min_32f_a.h \ + volk_32f_x2_multiply_32f_a.h \ + volk_32f_s32f_normalize_a.h \ + volk_32f_s32f_power_32f_a.h \ + volk_32f_sqrt_32f_a.h \ + volk_32f_s32f_stddev_32f_a.h \ + volk_32f_stddev_and_mean_32f_x2_a.h \ + volk_32f_x2_subtract_32f_a.h \ + volk_32f_x3_sum_of_poly_32f_a.h \ + volk_32i_x2_and_32i_a.h \ + volk_32i_s32f_convert_32f_a.h \ volk_32i_s32f_convert_32f_u.h \ - volk_32i_x2_or_32i_a16.h \ - volk_32u_byteswap_a16.h \ - volk_32u_popcnt_a16.h \ - volk_64f_convert_32f_a16.h \ + volk_32i_x2_or_32i_a.h \ + volk_32u_byteswap_a.h \ + volk_32u_popcnt_a.h \ + volk_64f_convert_32f_a.h \ volk_64f_convert_32f_u.h \ - volk_64f_x2_max_64f_a16.h \ - volk_64f_x2_min_64f_a16.h \ - volk_64u_byteswap_a16.h \ - volk_64u_popcnt_a16.h \ - volk_8ic_deinterleave_16i_x2_a16.h \ - volk_8ic_s32f_deinterleave_32f_x2_a16.h \ - volk_8ic_deinterleave_real_16i_a16.h \ - volk_8ic_s32f_deinterleave_real_32f_a16.h \ - volk_8ic_deinterleave_real_8i_a16.h \ - volk_8ic_x2_multiply_conjugate_16ic_a16.h \ - volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \ - volk_8i_convert_16i_a16.h \ + volk_64f_x2_max_64f_a.h \ + volk_64f_x2_min_64f_a.h \ + volk_64u_byteswap_a.h \ + volk_64u_popcnt_a.h \ + volk_8ic_deinterleave_16i_x2_a.h \ + volk_8ic_s32f_deinterleave_32f_x2_a.h \ + volk_8ic_deinterleave_real_16i_a.h \ + volk_8ic_s32f_deinterleave_real_32f_a.h \ + volk_8ic_deinterleave_real_8i_a.h \ + volk_8ic_x2_multiply_conjugate_16ic_a.h \ + volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h \ + volk_8i_convert_16i_a.h \ volk_8i_convert_16i_u.h \ - volk_8i_s32f_convert_32f_a16.h \ + volk_8i_s32f_convert_32f_a.h \ volk_8i_s32f_convert_32f_u.h - -VOLK_MKTABLES_SOURCES = \ - $(platform_CODE) \ - $(top_srcdir)/lib/volk_rank_archs.c \ - $(top_srcdir)/lib/volk_mktables.c - - -volk_mktables$(EXEEXT): $(VOLK_MKTABLES_SOURCES) - $(CC) -o $@ $^ $(AM_CPPFLAGS) -I$(top_builddir)/include - -volk_tables.h: volk_mktables$(EXEEXT) - ./volk_mktables$(EXEEXT) - -volk_config.h: $(top_builddir)/volk_config.h - cp $^ $(top_builddir)/include/volk/$@ - -distclean-local: - rm -f volk_config_fixed.h - rm -f volk_config.h - rm -f volk_cpu.h - rm -f volk.h - rm -f volk_registry.h - rm -f volk_runtime.h - rm -f volk_typedefs.h - rm -f volk_tables.h - rm -f *.pyc - rm -f Makefile.in - rm -f volk_environment_init.h - rm -f volk_mktables - rm -f $(BUILT_SOURCES) diff --git a/volk/include/volk/emit_omnilog.py b/volk/include/volk/emit_omnilog.py deleted file mode 100644 index 309d7e578..000000000 --- a/volk/include/volk/emit_omnilog.py +++ /dev/null @@ -1,13 +0,0 @@ -def emit_prolog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + 'extern "C" {\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; -def emit_epilog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + '}\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; - diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py deleted file mode 100644 index 6e75067d0..000000000 --- a/volk/include/volk/make_c.py +++ /dev/null @@ -1,73 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n\n#include<volk/volk.h>\n'; - tempstring = tempstring + '#include<volk/volk_tables.h>\n'; - tempstring = tempstring + '#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<string.h>\n'; - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "static inline unsigned int volk_get_index(const char** indices, const char* arch, const int* arch_defs) {\n"; - tempstring = tempstring + " int i = 1;\n" - tempstring = tempstring + " for(;i<arch_defs[0];++i){\n" - tempstring = tempstring + " if (strcmp(arch, indices[i]) == 0) {\n" - tempstring = tempstring + " return i;\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n" - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "static const char* " + funclist[i] + "_indices[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n \"" + str(taglist[i][tags_counter]) + "\",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + "_manual" + arched_arglist[i] + '\n'; - tempstring = tempstring + "return " + funclist[i] + "_archs[volk_get_index(" + funclist[i] + "_indices, arch, " + funclist[i] + "_arch_defs)](" + my_arglist[i] + ");" + "\n}\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - - tempstring = tempstring + funclist[i] + "_archs[" + funclist[i] + "_func_table](" + my_arglist[i] + ");" + '\n'; - tempstring = tempstring + "}\n\n"; - - return tempstring; diff --git a/volk/include/volk/make_cpuid_generic_c.py b/volk/include/volk/make_cpuid_generic_c.py deleted file mode 100644 index c682d4138..000000000 --- a/volk/include/volk/make_cpuid_generic_c.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_generic_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; diff --git a/volk/include/volk/make_cpuid_powerpc_c.py b/volk/include/volk/make_cpuid_powerpc_c.py deleted file mode 100644 index 0b0ea84e7..000000000 --- a/volk/include/volk/make_cpuid_powerpc_c.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_powerpc_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - #just assume it has them for powerpc - for domarch in dom: - if str(domarch.attributes["type"].value) == "powerpc": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - - tempstring = tempstring + "}\n\n" - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - diff --git a/volk/include/volk/make_cpuid_x86_c.py b/volk/include/volk/make_cpuid_x86_c.py deleted file mode 100644 index 2b2bd7c91..000000000 --- a/volk/include/volk/make_cpuid_x86_c.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_x86_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "#include <gcc_x86_cpuid.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - tempstring = tempstring + "#define cpuid_x86(op, r) __get_cpuid(op, r+0, r+1, r+2, r+3)\n\n" - tempstring = tempstring + "static inline unsigned int cpuid_eax(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[0];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ebx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[1];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ecx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[2];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_edx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[3];\n" - tempstring = tempstring + "}\n\n"; - - for domarch in dom: - if str(domarch.attributes["type"].value) == "x86": - if "no_test" in domarch.attributes.keys(): - no_test = str(domarch.attributes["no_test"].value); - if no_test == "true": - no_test = True; - else: - no_test = False; - else: - no_test = False; - arch = str(domarch.attributes["name"].value); - op = domarch.getElementsByTagName("op"); - if op: - op = str(op[0].firstChild.data); - reg = domarch.getElementsByTagName("reg"); - if reg: - reg = str(reg[0].firstChild.data); - shift = domarch.getElementsByTagName("shift"); - if shift: - shift = str(shift[0].firstChild.data); - val = domarch.getElementsByTagName("val"); - if val: - val = str(val[0].firstChild.data); - - if no_test: - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif op == "1": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int e" + reg + "x = cpuid_e" + reg + "x (" + op + ");\n" - tempstring = tempstring + " return ((e" + reg + "x >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - - elif op == "0x80000001": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int extended_fct_count = cpuid_eax(0x80000000);\n"; - tempstring = tempstring + " if (extended_fct_count < 0x80000001)\n"; - tempstring = tempstring + " return "+ val + "^1;\n\n" - tempstring = tempstring + " unsigned int extended_features = cpuid_e" + reg + "x (" + op + ");\n"; - tempstring = tempstring + " return ((extended_features >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - - - - - - - diff --git a/volk/include/volk/make_h.py b/volk/include/volk/make_h.py deleted file mode 100644 index 81d9ad401..000000000 --- a/volk/include/volk/make_h.py +++ /dev/null @@ -1,28 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_environment_init.h>\n' - tempstring = tempstring + emit_prolog() - tempstring = tempstring + '\n'; - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - tempstring = tempstring + retlist[i] + funclist[i] + "_manual" + replace_bracket.sub(";", arched_arglist[i]) + '\n'; - - tempstring = tempstring + emit_epilog(); - - tempstring = tempstring + "#endif /*INCLUDED_VOLK_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_init_c.py b/volk/include/volk/make_init_c.py deleted file mode 100644 index 330e19592..000000000 --- a/volk/include/volk/make_init_c.py +++ /dev/null @@ -1,42 +0,0 @@ -from xml.dom import minidom - -def make_init_c(funclist, dom) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - incs = domarch.getElementsByTagName("include"); - for inc in incs: - my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + "#include<" + my_inc + ">\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + '\n\n'; - - tempstring = tempstring + "extern struct VOLK_RUNTIME volk_runtime;\n\n"; - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime(){\n"; - tempstring = tempstring + " return &volk_runtime;\n"; - tempstring = tempstring + "}\n\n" - tempstring = tempstring + " void volk_runtime_init() {\nvolk_cpu_init();\n"; - - for func in funclist: - tempstring = tempstring + " volk_runtime." + func + " = default_acquire_" + func + ";\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - envs = domarch.getElementsByTagName("environment"); - for env in envs: - cmd = str(env.firstChild.data); - tempstring = tempstring + " if(volk_cpu.has_" + arch + "()){\n"; - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + " " + cmd + "\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + " }\n"; - - tempstring = tempstring + "}\n"; - - return tempstring diff --git a/volk/include/volk/make_init_h.py b/volk/include/volk/make_init_h.py deleted file mode 100644 index 6dbe1c585..000000000 --- a/volk/include/volk/make_init_h.py +++ /dev/null @@ -1,26 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_init_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - - tempstring = tempstring + '\n'; - - tempstring = tempstring + emit_prolog(); - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + " default_acquire_" + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - - tempstring= tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_INIT_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_mktables.py b/volk/include/volk/make_mktables.py deleted file mode 100644 index 051ac268d..000000000 --- a/volk/include/volk/make_mktables.py +++ /dev/null @@ -1,33 +0,0 @@ - - -def make_mktables(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '#include<stdio.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<volk_rank_archs.h>\n'; - tempstrgin = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + "\n\n"; - - tempstring = tempstring + 'int main() {\n'; - tempstring = tempstring + ' int i = 0;\n'; - tempstring = tempstring + ' FILE* output;\n'; - tempstring = tempstring + ' output = fopen("volk_tables.h", "w");\n'; - tempstring = tempstring + ' fprintf(output, "#ifndef INCLUDED_VOLK_TABLES_H\\n");\n'; - tempstring = tempstring + ' fprintf(output, "#define INCLUDED_VOLK_TABLES_H\\n\\n");\n'; - - for func in funclist: - tempstring = tempstring + ' fprintf(output, "static const ' + func + '_func_table = %u;\\n", volk_rank_archs(' + func + '_arch_defs, volk_get_lvarch()));\n'; - tempstring = tempstring + ' fprintf(output, "#endif /*INCLUDED_VOLK_TABLES_H*/\\n");\n'; - tempstring = tempstring + ' fclose(output);\n' - tempstring = tempstring + '}\n'; - return tempstring; - - - - - - - - diff --git a/volk/include/volk/make_registry.py b/volk/include/volk/make_registry.py deleted file mode 100644 index 8457d61f3..000000000 --- a/volk/include/volk/make_registry.py +++ /dev/null @@ -1,62 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -import string - -def make_registry(dom, funclist, fcountlist) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_REGISTRY_H'; - tempstring = tempstring +'\n#define INCLUDED_VOLK_REGISTRY_H\n\n'; - tempstring = tempstring +'#include<volk/volk_config.h>\n'; - tempstring = tempstring +'#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + emit_prolog(); - tempstring = tempstring + '\n' - - - - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring +"#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 1\n"; - tempstring = tempstring +"#else\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 0\n"; - tempstring = tempstring +"#endif /*LV_HAVE_" + arch.swapcase() + "*/\n\n"; - - counter = 0; - for fcount in fcountlist: - tempstring = tempstring + "static const int " + funclist[counter] + "_arch_defs[] = {\n"; - counter = counter + 1; - for arch_list in fcount: - tempstring = tempstring + " (LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind] + "_CNT"; - if ind < len(arch_list) - 1: - tempstring = tempstring + " * LV_"; - tempstring = tempstring + ") + "; - lindex = tempstring.rfind(" + "); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], " + ", ""); - tempstring = tempstring + ",\n" - for arch_list in fcount: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - tempstring = tempstring + "\n" - tempstring = tempstring + " (1 << LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + ") + (1 << LV_" - tempstring = tempstring + "),\n#endif\n" - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n" - - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring +"#endif /*INCLUDED_VOLK_REGISTRY_H*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime.py b/volk/include/volk/make_runtime.py deleted file mode 100644 index 645b3aaee..000000000 --- a/volk/include/volk/make_runtime.py +++ /dev/null @@ -1,34 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_runtime(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n\n#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + emit_prolog(); - - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME {\n"; - - for i in range(len(funclist)): - tempstring = tempstring + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n"; - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime();\n\n" - tempstring = tempstring + "\nvoid volk_runtime_init();\n"; - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime_c.py b/volk/include/volk/make_runtime_c.py deleted file mode 100644 index 070df9ba7..000000000 --- a/volk/include/volk/make_runtime_c.py +++ /dev/null @@ -1,47 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_runtime_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + "#include<volk/volk_config_fixed.h>\n"; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME volk_runtime;\n"; - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - - tempstring = tempstring + retlist[i] + "default_acquire_" + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - tempstring = tempstring + "volk_runtime." + funclist[i] + " = " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())];\n" + "return " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())](" + my_arglist[i] + ");" + '\n}\n'; - - return tempstring; diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py deleted file mode 100644 index c74b0464d..000000000 --- a/volk/include/volk/make_set_simd.py +++ /dev/null @@ -1,272 +0,0 @@ -# -# Copyright 2010 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# - -from xml.dom import minidom - -def make_set_simd(dom) : - tempstring = ""; - tempstring = tempstring +'dnl this file is auto generated by volk_register.py\n\n'; - - tempstring = tempstring + "AC_DEFUN([_MAKE_FAKE_PROCCPU],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_MSG_CHECKING([proccpu])\n"; - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ -I$srcdir/lib $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_x86.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (powerpc)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_powerpc.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (*)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_generic.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " esac\n"; - tempstring = tempstring + "])\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - tempstring = tempstring + "AC_DEFUN([_TRY_ADD_" + arch.swapcase() + "],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " LF_CHECK_CC_FLAG([-" + flag + "])\n"; - tempstring = tempstring + " LF_CHECK_CXX_FLAG([-" + flag + "])\n"; - tempstring = tempstring + "])\n"; - - tempstring = tempstring + "AC_DEFUN([LV_SET_SIMD_FLAGS],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_SUBST(LV_CXXFLAGS)\n"; - tempstring = tempstring + " indCC=no\n"; - tempstring = tempstring + " indCXX=no\n"; - tempstring = tempstring + " indLV_ARCH=no\n"; - tempstring = tempstring + " AC_ARG_WITH(lv_arch,\n"; - tempstring = tempstring + " AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list("; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + arch + ", " - tempstring = tempstring[0:len(tempstring) - 2]; - - tempstring = tempstring + ")]),\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"$withval\"],\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"\"])\n"; - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; - tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; - tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; - tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; - tempstring = tempstring + " cf_with_lv_arch=$lv_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"yes\"\n"; - - tempstring = tempstring + " fi\n"; - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=no\n"; - - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "x86": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - overrule = domarch.getElementsByTagName("overrule"); - if overrule: - overrule = str(overrule[0].firstChild.data); - else: - overrule = ""; - overrule_val = domarch.getElementsByTagName("overrule_val"); - if overrule_val: - overrule_val = str(overrule_val[0].firstChild.data); - else: - overrule_val = ""; - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "x86": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - - tempstring = tempstring + " if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - - tempstring = tempstring + " ;;\n" - - tempstring = tempstring + " (powerpc)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "powerpc": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "powerpc": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test \"$indCC\" = yes && test \"indCXX\" = yes && \"indLV_ARCH\" = yes; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " (*)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " esac\n" - tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" - tempstring = tempstring + "])\n" - - return tempstring; - - diff --git a/volk/include/volk/volk_16i_branch_4_state_8_a16.h b/volk/include/volk/volk_16i_branch_4_state_8_a.h index 3437c1a6b..0424e66e9 100644 --- a/volk/include/volk/volk_16i_branch_4_state_8_a16.h +++ b/volk/include/volk/volk_16i_branch_4_state_8_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H -#define INCLUDED_volk_16i_branch_4_state_8_a16_H +#ifndef INCLUDED_volk_16i_branch_4_state_8_a_H +#define INCLUDED_volk_16i_branch_4_state_8_a_H #include<inttypes.h> @@ -8,13 +8,13 @@ -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11; @@ -137,8 +137,8 @@ static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* s #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_branch_4_state_8_a_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { int i = 0; int bound = 4; @@ -191,4 +191,4 @@ static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/ +#endif /*INCLUDED_volk_16i_branch_4_state_8_a_H*/ diff --git a/volk/include/volk/volk_16i_convert_8i_a16.h b/volk/include/volk/volk_16i_convert_8i_a.h index 73e45ad63..8046035c7 100644 --- a/volk/include/volk/volk_16i_convert_8i_a16.h +++ b/volk/include/volk/volk_16i_convert_8i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16i_convert_8i_a16_H -#define INCLUDED_volk_16i_convert_8i_a16_H +#ifndef INCLUDED_volk_16i_convert_8i_a_H +#define INCLUDED_volk_16i_convert_8i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data @@ -12,7 +12,7 @@ \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -52,7 +52,7 @@ static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int1 \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -66,4 +66,4 @@ static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const i -#endif /* INCLUDED_volk_16i_convert_8i_a16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_a_H */ diff --git a/volk/include/volk/volk_16i_convert_8i_u.h b/volk/include/volk/volk_16i_convert_8i_u.h index 5fc792b56..df1084fe0 100644 --- a/volk/include/volk/volk_16i_convert_8i_u.h +++ b/volk/include/volk/volk_16i_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data diff --git a/volk/include/volk/volk_16i_max_star_16i_a16.h b/volk/include/volk/volk_16i_max_star_16i_a.h index ff57bd2a1..6a4f63708 100644 --- a/volk/include/volk/volk_16i_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_16i_a.h @@ -1,18 +1,18 @@ -#ifndef INCLUDED_volk_16i_max_star_16i_a16_H -#define INCLUDED_volk_16i_max_star_16i_a16_H +#ifndef INCLUDED_volk_16i_max_star_16i_a_H +#define INCLUDED_volk_16i_max_star_16i_a_H #include<inttypes.h> #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) { @@ -85,9 +85,9 @@ static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) { int i = 0; @@ -105,4 +105,4 @@ static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_max_star_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h index 695e08dbf..f60b33a41 100644 --- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h @@ -1,18 +1,18 @@ -#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H -#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H +#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H +#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H #include<inttypes.h> #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d}; @@ -109,8 +109,8 @@ static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { int i = 0; @@ -127,4 +127,4 @@ static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h index e52a949fb..de36cee80 100644 --- a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h +++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H -#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H +#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a_H +#define INCLUDED_volk_16i_permute_and_scalar_add_a_H #include<inttypes.h> @@ -8,12 +8,12 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -116,8 +116,8 @@ static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, sho #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { int i = 0; @@ -136,4 +136,4 @@ static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, sh #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/ +#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a_H*/ diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a.h index 83fd26ff9..0555fdf00 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H -#define INCLUDED_volk_16i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H +#define INCLUDED_volk_16i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -58,7 +58,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -68,7 +68,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -94,7 +94,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer @@ -102,7 +102,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -116,4 +116,4 @@ static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, co -#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_u.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h index 8f0dd0083..d34acc091 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -59,7 +59,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -96,7 +96,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const in } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h index e4ec5ab4e..2688aff04 100644 --- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H -#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H +#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a_H +#define INCLUDED_volk_16i_x4_quad_max_star_16i_a_H #include<inttypes.h> @@ -9,11 +9,11 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<emmintrin.h> -static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { @@ -96,9 +96,9 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* /*asm volatile ( - "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t" + "volk_16i_x4_quad_max_star_16i_a_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t" + "je volk_16i_x4_quad_max_star_16i_a_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" @@ -143,9 +143,9 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* "movaps %%xmm1, (%[target])\n\t" "addw $16, %[target]\n\t" - "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t" + "jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t" - "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t" + "volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target) : @@ -167,8 +167,8 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { int i = 0; @@ -188,4 +188,4 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, shor #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h index 5744ca3a6..e4c9f17ed 100644 --- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h +++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H -#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H +#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H +#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H #include<inttypes.h> @@ -9,11 +9,11 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4; __m128i *p_target0, *p_target1, *p_target2, *p_target3, *p_src0, *p_src1, *p_src2, *p_src3, *p_src4; @@ -65,9 +65,9 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* } /*asm volatile ( - ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t" + "je .%=volk_16i_x5_add_quad_16i_x4_a_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" "movaps (%[src2]), %%xmm3\n\t" @@ -91,8 +91,8 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* "add $16, %[target1]\n\t" "add $16, %[target2]\n\t" "add $16, %[target3]\n\t" - "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t" - ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t" + "jmp .%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3) :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -111,9 +111,9 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { int i = 0; @@ -133,4 +133,4 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short -#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/ +#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H*/ diff --git a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h index 7e08bf182..cdd60235e 100644 --- a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H -#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H +#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -52,7 +52,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -61,7 +61,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -120,7 +120,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1 } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector @@ -128,7 +128,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1 \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -140,7 +140,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector @@ -148,11 +148,11 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); -static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points); +extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_16i_x2_a_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a_H */ diff --git a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h index 388c00592..2b99e068e 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H -#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H +#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -47,7 +47,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -55,7 +55,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -96,14 +96,14 @@ static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, co } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -117,4 +117,4 @@ static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, -#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h index 55a25702e..cd2fabb52 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H -#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H +#define INCLUDED_volk_16ic_deinterleave_real_8i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -59,14 +59,14 @@ static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, con } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; int16_t* complexVectorPtr = (int16_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -77,18 +77,18 @@ static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); -static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points); +extern void volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_real_8i_a_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a_H */ diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a.h index bdcace750..a6951e967 100644 --- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H -#define INCLUDED_volk_16ic_magnitude_16i_a16_H +#ifndef INCLUDED_volk_16ic_magnitude_16i_a_H +#define INCLUDED_volk_16ic_magnitude_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -13,7 +14,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,8 +26,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -76,7 +77,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -84,7 +85,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,8 +97,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float inputFloatBuffer[4] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -153,14 +154,14 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -173,18 +174,18 @@ static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); -static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); +extern void volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); +static inline void volk_16ic_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */ +#endif /* INCLUDED_volk_16ic_magnitude_16i_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h index 606de2fc5..e73d405e0 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H +#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data @@ -14,7 +15,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl __m128 invScalar = _mm_set_ps1(1.0/scalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ @@ -68,7 +69,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector @@ -77,7 +78,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -89,7 +90,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector @@ -98,11 +99,11 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ - volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); +extern void volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h index 62331e496..1630cb0ed 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H -#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H +#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -13,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -52,7 +53,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -61,7 +62,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -72,7 +73,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, __m128 invScalar = _mm_set_ps1(iScalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -99,7 +100,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I float vector data \param complexVector The complex input vector @@ -107,7 +108,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; @@ -122,4 +123,4 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuff -#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h index ae64efbeb..35406e2cb 100644 --- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H -#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H +#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -14,7 +15,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ @@ -70,7 +71,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -79,7 +80,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -91,7 +92,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result, re, im; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ inputFloatBuffer[0] = (float)(complexVectorPtr[0]); @@ -140,7 +141,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -148,7 +149,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -161,7 +162,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -169,11 +170,11 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ - volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_magnitude_32f_a_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */ diff --git a/volk/include/volk/volk_16u_byteswap_a16.h b/volk/include/volk/volk_16u_byteswap_a.h index c8128dbab..75c7ef0f3 100644 --- a/volk/include/volk/volk_16u_byteswap_a16.h +++ b/volk/include/volk/volk_16u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16u_byteswap_a16_H -#define INCLUDED_volk_16u_byteswap_a16_H +#ifndef INCLUDED_volk_16u_byteswap_a_H +#define INCLUDED_volk_16u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint16_t* inputPtr = intsToSwap; __m128i input, left, right, output; @@ -43,13 +43,13 @@ static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned int num_points){ unsigned int point; uint16_t* inputPtr = intsToSwap; for(point = 0; point < num_points; point++){ @@ -61,17 +61,17 @@ static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points); -static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){ - volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points); +extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points); +static inline void volk_16u_byteswap_a_orc(uint16_t* intsToSwap, unsigned int num_points){ + volk_16u_byteswap_a_orc_impl(intsToSwap, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16u_byteswap_a16_H */ +#endif /* INCLUDED_volk_16u_byteswap_a_H */ diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a.h index 4a3588e6d..7ce0d1c80 100644 --- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H -#define INCLUDED_volk_32f_accumulator_s32f_a16_H +#ifndef INCLUDED_volk_32f_accumulator_s32f_a_H +#define INCLUDED_volk_32f_accumulator_s32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Accumulates the values in the input buffer @@ -12,13 +13,13 @@ \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float tempBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float tempBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -42,14 +43,14 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Accumulates the values in the input buffer \param result The accumulated result \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a_generic(float* result, const float* inputBuffer, unsigned int num_points){ const float* aPtr = inputBuffer; unsigned int number = 0; float returnValue = 0; @@ -64,4 +65,4 @@ static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const fl -#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */ +#endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_a16.h b/volk/include/volk/volk_32f_convert_64f_a.h index c303dc118..dda646409 100644 --- a/volk/include/volk/volk_32f_convert_64f_a16.h +++ b/volk/include/volk/volk_32f_convert_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_convert_64f_a16_H -#define INCLUDED_volk_32f_convert_64f_a16_H +#ifndef INCLUDED_volk_32f_convert_64f_a_H +#define INCLUDED_volk_32f_convert_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const flo \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const -#endif /* INCLUDED_volk_32f_convert_64f_a16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_a_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_u.h b/volk/include/volk/volk_32f_convert_64f_u.h index a825767de..387baa3b9 100644 --- a/volk/include/volk/volk_32f_convert_64f_u.h +++ b/volk/include/volk/volk_32f_convert_64f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a.h index d070e17d5..3e0cf1d65 100644 --- a/volk/include/volk/volk_32f_index_max_16u_a16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a.h @@ -1,14 +1,15 @@ -#ifndef INCLUDED_volk_32f_index_max_16u_a16_H -#define INCLUDED_volk_32f_index_max_16u_a16_H +#ifndef INCLUDED_volk_32f_index_max_16u_a_H +#define INCLUDED_volk_32f_index_max_16u_a_H #include <volk/volk_common.h> +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include<smmintrin.h> -static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,8 +26,8 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -63,10 +64,10 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const #endif /*LV_HAVE_SSE4_1*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include<xmmintrin.h> -static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -83,8 +84,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -122,8 +123,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_GENERIC -static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ float max = src0[0]; unsigned int index = 0; @@ -145,4 +146,4 @@ static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, cons #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/ +#endif /*INCLUDED_volk_32f_index_max_16u_a_H*/ diff --git a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h index ff4d5b19c..b25df75a1 100644 --- a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H -#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H +#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. @@ -14,7 +14,7 @@ \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_noints The number of real values in the input vector. */ -static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -78,7 +78,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. \param outputVector The byte-aligned vector where the results will be stored. @@ -87,7 +87,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_points The number of real values in the input vector. */ -static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -117,4 +117,4 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVect -#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h index 168245d65..b1902a8c0 100644 --- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H -#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H +#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the spectral noise floor of an input power spectrum @@ -16,12 +17,12 @@ \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; const float* dataPointsPtr = realDataPoints; - float avgPointsVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4]; __m128 dataPointsVal; __m128 avgPointsVal = _mm_setzero_ps(); @@ -87,7 +88,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no sumMean += avgPointsVector[3]; // Calculate the number of valid bins from the remaning count - float validBinCountVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4]; _mm_store_ps(validBinCountVector, vValidBinCount); float validBinCount = 0; @@ -116,7 +117,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the spectral noise floor of an input power spectrum @@ -127,7 +128,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ float sumMean = 0.0; unsigned int number; // find the sum (for mean), etc @@ -164,4 +165,4 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float -#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h index d6b16e336..0a2b4f0f2 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H -#define INCLUDED_volk_32f_s32f_convert_16i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H +#define INCLUDED_volk_32f_s32f_convert_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -13,7 +14,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -44,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -53,7 +54,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -63,7 +64,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -93,7 +94,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -107,4 +108,4 @@ static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, -#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index 4d306e53c..dec3f1611 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -45,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -65,7 +65,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h index ae874fd7b..aa370e614 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h @@ -1,10 +1,47 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H -#define INCLUDED_volk_32f_s32f_convert_32i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H +#define INCLUDED_volk_32f_s32f_convert_32i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_AVX +#include <immintrin.h> + /*! + \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value + \param inputVector The floating point input data buffer + \param outputVector The 32 bit output data buffer + \param scalar The value multiplied against each point in the input buffer + \param num_points The number of data values to be converted + */ +static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + + const unsigned int eighthPoints = num_points / 8; + + const float* inputVectorPtr = (const float*)inputVector; + int32_t* outputVectorPtr = outputVector; + __m256 vScalar = _mm256_set1_ps(scalar); + __m256 inputVal1; + __m256i intInputVal1; + + for(;number < eighthPoints; number++){ + inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8; + + intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar)); + + _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1); + outputVectorPtr += 8; + } + + number = eighthPoints * 8; + for(; number < num_points; number++){ + outputVector[number] = (int32_t)(inputVector[number] * scalar); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -13,7 +50,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -40,7 +77,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -49,7 +86,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -59,7 +96,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -89,7 +126,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -103,4 +140,4 @@ static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, -#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index 561fcd800..b4e954dc4 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h index f64f2a213..8d87a07d7 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H -#define INCLUDED_volk_32f_s32f_convert_8i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H +#define INCLUDED_volk_32f_s32f_convert_8i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -13,7 +14,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -51,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -60,7 +61,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -70,7 +71,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -100,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -114,4 +115,4 @@ static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, co -#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index 420693571..1c6bf87c9 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -52,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -72,7 +72,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_normalize_a16.h b/volk/include/volk/volk_32f_s32f_normalize_a.h index 0850cddf7..f5fd0d1db 100644 --- a/volk/include/volk/volk_32f_s32f_normalize_a16.h +++ b/volk/include/volk/volk_32f_s32f_normalize_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H -#define INCLUDED_volk_32f_s32f_normalize_a16_H +#ifndef INCLUDED_volk_32f_s32f_normalize_a_H +#define INCLUDED_volk_32f_s32f_normalize_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Normalizes all points in the buffer by the scalar value ( divides each data point by the scalar value ) @@ -12,7 +12,7 @@ \param num_points The number of values in vecBuffer \param scalar The scale value to be applied to each buffer value */ -static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -49,7 +49,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a_generic(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; const float invScalar = 1.0 / scalar; @@ -60,7 +60,7 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -68,14 +68,14 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); -static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){ +extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); +static inline void volk_32f_s32f_normalize_a_orc(float* vecBuffer, const float scalar, unsigned int num_points){ float invscalar = 1.0 / scalar; - volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); + volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_normalize_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a.h index 3ed594d9a..c4fa31bd1 100644 --- a/volk/include/volk/volk_32f_s32f_power_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_power_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H -#define INCLUDED_volk_32f_s32f_power_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H +#define INCLUDED_volk_32f_s32f_power_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <tmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,14 +19,14 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -62,10 +62,10 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -76,14 +76,14 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -119,7 +119,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored @@ -127,7 +127,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -141,4 +141,4 @@ static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const flo -#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h index 32f4fa067..881067bdc 100644 --- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H -#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H +#define INCLUDED_volk_32f_s32f_stddev_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -14,7 +15,7 @@ \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -22,7 +23,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal1, aVal2, aVal3, aVal4; @@ -65,7 +66,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -74,7 +75,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -82,7 +83,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -111,7 +112,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation of the input buffer using the supplied mean \param stddev The calculated standard deviation @@ -119,7 +120,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ const float* aPtr = inputBuffer; @@ -141,4 +142,4 @@ static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const flo -#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */ diff --git a/volk/include/volk/volk_32f_sqrt_32f_a16.h b/volk/include/volk/volk_32f_sqrt_32f_a.h index 513c2cffe..e44c73cfd 100644 --- a/volk/include/volk/volk_32f_sqrt_32f_a16.h +++ b/volk/include/volk/volk_32f_sqrt_32f_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H -#define INCLUDED_volk_32f_sqrt_32f_a16_H +#ifndef INCLUDED_volk_32f_sqrt_32f_a_H +#define INCLUDED_volk_32f_sqrt_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Sqrts the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -40,14 +40,14 @@ static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVecto } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Sqrts the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a_generic(float* cVector, const float* aVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -58,20 +58,20 @@ static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aV } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC -extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int); +#ifdef LV_HAVE_ORC +extern void volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int); /*! \brief Sqrts the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){ - volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points); +static inline void volk_32f_sqrt_32f_a_orc(float* cVector, const float* aVector, unsigned int num_points){ + volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */ +#endif /* INCLUDED_volk_32f_sqrt_32f_a_H */ diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h index 278089841..3a82e3d2f 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H -#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H +#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -14,7 +15,7 @@ \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -22,8 +23,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo const unsigned int sixteenthPoints = num_points / 16; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -78,7 +79,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -87,7 +88,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -95,8 +96,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -134,7 +135,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation and mean of the input buffer \param stddev The calculated standard deviation @@ -142,7 +143,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -166,4 +167,4 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, fl -#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */ +#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */ diff --git a/volk/include/volk/volk_32f_x2_add_32f_a16.h b/volk/include/volk/volk_32f_x2_add_32f_a.h index d0d0e0a0e..3bc83653b 100644 --- a/volk/include/volk/volk_32f_x2_add_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_add_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H -#define INCLUDED_volk_32f_x2_add_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_add_32f_a_H +#define INCLUDED_volk_32f_x2_add_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Adds the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_add_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_add_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_divide_32f_a16.h b/volk/include/volk/volk_32f_x2_divide_32f_a.h index d844e25b0..52ddfae87 100644 --- a/volk/include/volk/volk_32f_x2_divide_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_divide_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H -#define INCLUDED_volk_32f_x2_divide_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_divide_32f_a_H +#define INCLUDED_volk_32f_x2_divide_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Divides the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,12 +71,12 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_divide_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_divide_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_divide_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_divide_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h index 61aa56815..0c58f2ecf 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h @@ -1,13 +1,14 @@ -#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H -#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_a_H +#include <volk/volk_common.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -24,10 +25,10 @@ static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const fl #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE -static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +54,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,11 +74,11 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -102,7 +103,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,11 +121,11 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -163,7 +164,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; @@ -181,4 +182,4 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a_H*/ diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index 8469a3cea..7f47122ff 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -4,7 +4,7 @@ #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) { @@ -24,7 +24,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const floa #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* input, const float* taps, unsigned int num_points) { @@ -53,7 +53,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,7 +73,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> @@ -102,7 +102,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,7 +120,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> @@ -163,7 +163,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h index 29c9392df..1d4d2dbbd 100644 --- a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h +++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H -#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H +#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H +#define INCLUDED_volk_32f_x2_interleave_32fc_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector @@ -13,7 +13,7 @@ \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ unsigned int number = 0; float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; @@ -48,7 +48,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector. \param iBuffer The I buffer data to be interleaved @@ -56,7 +56,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVec -#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */ +#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */ diff --git a/volk/include/volk/volk_32f_x2_max_32f_a16.h b/volk/include/volk/volk_32f_x2_max_32f_a.h index 26e7f1246..7948c458d 100644 --- a/volk/include/volk/volk_32f_x2_max_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_max_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H -#define INCLUDED_volk_32f_x2_max_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_max_32f_a_H +#define INCLUDED_volk_32f_x2_max_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -45,7 +45,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,7 +67,7 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -75,11 +75,11 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_max_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_max_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_max_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_min_32f_a16.h b/volk/include/volk/volk_32f_x2_min_32f_a.h index 23bae044c..d77134868 100644 --- a/volk/include/volk/volk_32f_x2_min_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_min_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H -#define INCLUDED_volk_32f_x2_min_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_min_32f_a_H +#define INCLUDED_volk_32f_x2_min_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -45,7 +45,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,7 +67,7 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -75,11 +75,11 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_min_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a.h index a0dcfa86e..fae9a652f 100644 --- a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_multiply_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H -#define INCLUDED_volk_32f_x2_multiply_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H +#define INCLUDED_volk_32f_x2_multiply_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplys the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,46 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_AVX +#include <immintrin.h> +/*! + \brief Multiplies the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m256 aVal, bVal, cVal; + for(;number < eighthPoints; number++){ + + aVal = _mm256_load_ps(aPtr); + bVal = _mm256_load_ps(bPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + bPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +90,7 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +102,7 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +110,11 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_multiply_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h index 30306774d..cc02c3678 100644 --- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H -#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H +#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -14,7 +15,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -62,7 +63,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -72,7 +73,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -85,7 +86,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ iValue = _mm_load_ps(iBufferPtr); @@ -127,7 +128,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. \param iBuffer The I buffer data to be interleaved @@ -136,7 +137,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ int16_t* complexVectorPtr = (int16_t*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -152,4 +153,4 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* compl -#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */ +#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */ diff --git a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a.h index 7404bfe79..16cad008a 100644 --- a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_subtract_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H -#define INCLUDED_volk_32f_x2_subtract_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H +#define INCLUDED_volk_32f_x2_subtract_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Subtracts bVector form aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_subtract_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h index af9e39537..2ea8fa96d 100644 --- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h +++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H -#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H +#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H +#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H #include<inttypes.h> #include<stdio.h> @@ -9,11 +9,11 @@ #define MAX(X,Y) ((X) > (Y)?(X):(Y)) #endif -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { float result = 0.0; @@ -98,9 +98,9 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* sr #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { @@ -148,4 +148,4 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/ +#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H*/ diff --git a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h index 514998800..b7350b9fa 100644 --- a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H -#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H +#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H +#define INCLUDED_volk_32fc_32f_multiply_32fc_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies the input complex vector with the input float vector and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector The vectors containing the float values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -56,7 +56,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored @@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const float* bPtr= bVector; @@ -76,7 +76,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored @@ -84,12 +84,12 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ - volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32fc_32f_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ + volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h index 84d2576ed..9de036ef4 100644 --- a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H +#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I & Q vector data \param complexVector The complex input vector @@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, flo -#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h index 34262a7af..29c369d9a 100644 --- a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H -#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data \param complexVector The complex input vector @@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, do -#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h index 9838ec88b..a1d0fd5d1 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H -#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H +#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -44,14 +44,14 @@ static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; @@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, c -#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h index af392d074..70a3b1971 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H -#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H +#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -42,14 +42,14 @@ static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, -#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */ diff --git a/volk/include/volk/volk_32fc_index_max_16u_a16.h b/volk/include/volk/volk_32fc_index_max_16u_a.h index 532ae4e7c..312e034e2 100644 --- a/volk/include/volk/volk_32fc_index_max_16u_a16.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a.h @@ -1,17 +1,17 @@ -#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H -#define INCLUDED_volk_32fc_index_max_16u_a16_H +#ifndef INCLUDED_volk_32fc_index_max_16u_a_H +#define INCLUDED_volk_32fc_index_max_16u_a_H #include <volk/volk_common.h> #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { @@ -188,8 +188,8 @@ static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32f #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { float sq_dist = 0.0; float max = 0.0; unsigned int index = 0; @@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_ #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/ +#endif /*INCLUDED_volk_32fc_index_max_16u_a_H*/ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_a16.h b/volk/include/volk/volk_32fc_magnitude_32f_a.h index be7216dce..f18e9bc0b 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_a16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H -#define INCLUDED_volk_32fc_magnitude_32f_a16_H +#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H +#define INCLUDED_volk_32fc_magnitude_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -48,7 +48,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,14 +96,14 @@ static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -115,18 +115,18 @@ static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); -static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ - volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points); +extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_a_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h index e9f74438d..9304b0c28 100644 --- a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H -#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H +#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,7 +19,7 @@ \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -27,7 +27,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -67,10 +67,10 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -81,7 +81,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -89,7 +89,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -131,7 +131,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the atan2 on the input vector and stores the results in the output vector. \param outputVector The vector where the results will be stored. @@ -139,7 +139,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ float* outPtr = outputVector; const float* inPtr = (float*)inputVector; const float invNormalizeFactor = 1.0 / normalizeFactor; @@ -155,4 +155,4 @@ static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, con -#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h index 31465bff9..1c17fb70c 100644 --- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H -#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H +#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data @@ -13,7 +14,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -24,7 +25,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer __m128 cplxValue1, cplxValue2, iValue; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -54,7 +55,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data \param complexVector The complex input vector @@ -62,7 +63,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* iBufferPtr = iBuffer; unsigned int number = 0; @@ -77,4 +78,4 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBu -#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h index 530359600..38fd609d3 100644 --- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H -#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H +#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -14,7 +15,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,7 +26,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto __m128 cplxValue1, cplxValue2, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -60,7 +61,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -69,7 +70,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -80,7 +81,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -120,7 +121,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -128,7 +129,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -140,7 +141,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -148,11 +149,11 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ - volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_s32f_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h index 3507fdb3c..ec1d7167f 100644 --- a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h @@ -1,13 +1,21 @@ -#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H -#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H +#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H +#define INCLUDED_volk_32fc_s32f_power_32fc_a_H #include <inttypes.h> #include <stdio.h> +#include <math.h> -#if LV_HAVE_SSE +//! raise a complex float to a real float power +static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, const float power){ + const float arg = power*atan2f(lv_creal(exp), lv_cimag(exp)); + const float mag = powf(lv_creal(exp)*lv_creal(exp) + lv_cimag(exp)*lv_cimag(exp), power/2); + return mag*lv_cmake(cosf(arg), sinf(arg)); +} + +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -18,14 +26,14 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue; @@ -72,16 +80,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l number = quarterPoints * 4; #endif /* LV_HAVE_LIB_SIMDMATH */ - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0; for(;number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power); } } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input complex vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored @@ -89,16 +94,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; unsigned int number = 0; - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0.0; for(number = 0; number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power); } } #endif /* LV_HAVE_GENERIC */ @@ -106,4 +108,4 @@ static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, con -#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h index 39d8f7aa2..8d1959dae 100644 --- a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H -#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H +#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,12 +19,12 @@ \param normalizationFactor This value is divided against all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -88,7 +88,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value for each input point \param logPowerOutput The 10.0 * log10(r*r + i*i) for each data point @@ -96,7 +96,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut \param normalizationFactor This value is divided agains all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -123,4 +123,4 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPower -#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h index 0120b5307..fc635f171 100644 --- a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H -#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H +#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -20,14 +20,14 @@ \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; const float iRBW = 1.0 / rbw; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -94,7 +94,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value divided by the RBW for each input point \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point @@ -103,7 +103,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -131,4 +131,4 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(floa -#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h index a01971df3..a6c21336d 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h @@ -1,14 +1,15 @@ -#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H -#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H +#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H +#include <volk/volk_common.h> #include<volk/volk_complex.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -62,9 +63,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -203,9 +204,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul #endif #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; int bound = num_bytes >> 4; int leftovers = num_bytes % 16; @@ -341,4 +342,4 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* re -#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h index 2fa5918cc..6b22d9f81 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -5,7 +5,7 @@ #include<volk/volk_complex.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { @@ -57,7 +57,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <xmmintrin.h> #include <pmmintrin.h> @@ -66,7 +66,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; union HalfMask { uint32_t intRep[4]; @@ -131,7 +131,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result Isum += Im; } - result[0] = lv_32fc_init(Rsum,Isum); + result[0] = lv_cmake(Rsum,Isum); return; } diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h index 9a7b65ab4..022a0a614 100644 --- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h @@ -1,15 +1,16 @@ -#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H -#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H +#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H +#include <volk/volk_common.h> #include <volk/volk_complex.h> #include <stdio.h> #include <string.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -59,7 +60,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, con #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm @@ -194,7 +195,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, cons #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm volatile ( @@ -316,11 +317,11 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, cons #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { lv_32fc_t dotProduct; @@ -358,7 +359,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const b += 2; } - lv_32fc_t dotProductVector[2] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -373,12 +374,12 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes); +static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { + volk_32fc_x2_dot_prod_32fc_a_sse3(result, input, taps, num_bytes); // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now /* __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; @@ -465,4 +466,4 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, cons #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h index b4214f5d2..18dd092e8 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h @@ -1,12 +1,12 @@ -#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H -#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H +#define INCLUDED_volk_32fc_x2_multiply_32fc_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> #include <float.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Multiplies the two input complex vectors and stores their results in the third vector @@ -15,7 +15,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored @@ -61,7 +61,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const lv_32fc_t* bPtr= bVector; @@ -73,7 +73,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored @@ -81,9 +81,9 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); -static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ - volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); +static inline void volk_32fc_x2_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ @@ -91,4 +91,4 @@ static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const -#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h index 6a863b16d..be7a4ffe9 100644 --- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h @@ -1,16 +1,16 @@ -#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H -#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H +#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H +#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> #include <string.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; @@ -105,8 +105,8 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -123,4 +123,4 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(flo #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h index 406097fc8..c21d00491 100644 --- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h @@ -1,15 +1,15 @@ -#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H -#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H +#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a_H +#define INCLUDED_volk_32fc_x2_square_dist_32f_a_H #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -91,8 +91,8 @@ static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_ #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -109,4 +109,4 @@ static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32 #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a_H*/ diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a.h index 0fcadd9cb..558142869 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H -#define INCLUDED_volk_32i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H +#define INCLUDED_volk_32i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -47,7 +47,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer @@ -55,7 +55,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -70,4 +70,4 @@ static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, co -#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_u.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h index 1dd6422f8..d8afd218c 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const i #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer diff --git a/volk/include/volk/volk_32i_x2_and_32i_a16.h b/volk/include/volk/volk_32i_x2_and_32i_a.h index 3baa1d856..dcd63d98e 100644 --- a/volk/include/volk/volk_32i_x2_and_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_and_32i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H -#define INCLUDED_volk_32i_x2_and_32i_a16_H +#ifndef INCLUDED_volk_32i_x2_and_32i_a_H +#define INCLUDED_volk_32i_x2_and_32i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ands the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32 } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32 \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); -static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ - volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_and_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */ +#endif /* INCLUDED_volk_32i_x2_and_32i_a_H */ diff --git a/volk/include/volk/volk_32i_x2_or_32i_a16.h b/volk/include/volk/volk_32i_x2_or_32i_a.h index 0be22f00a..243e8178c 100644 --- a/volk/include/volk/volk_32i_x2_or_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_or_32i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H -#define INCLUDED_volk_32i_x2_or_32i_a16_H +#ifndef INCLUDED_volk_32i_x2_or_32i_a_H +#define INCLUDED_volk_32i_x2_or_32i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ors the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); -static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ - volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_or_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */ +#endif /* INCLUDED_volk_32i_x2_or_32i_a_H */ diff --git a/volk/include/volk/volk_32u_byteswap_a16.h b/volk/include/volk/volk_32u_byteswap_a.h index 7556ec7b1..b88848096 100644 --- a/volk/include/volk/volk_32u_byteswap_a16.h +++ b/volk/include/volk/volk_32u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32u_byteswap_a16_H -#define INCLUDED_volk_32u_byteswap_a16_H +#ifndef INCLUDED_volk_32u_byteswap_a_H +#define INCLUDED_volk_32u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint32_t* inputPtr = intsToSwap; @@ -51,13 +51,13 @@ static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int32_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = intsToSwap; unsigned int point; @@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned -#endif /* INCLUDED_volk_32u_byteswap_a16_H */ +#endif /* INCLUDED_volk_32u_byteswap_a_H */ diff --git a/volk/include/volk/volk_32u_popcnt_a16.h b/volk/include/volk/volk_32u_popcnt_a.h index f6e25e4e8..b72d605c6 100644 --- a/volk/include/volk/volk_32u_popcnt_a16.h +++ b/volk/include/volk/volk_32u_popcnt_a.h @@ -5,9 +5,9 @@ #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a_generic(uint32_t* ret, const uint32_t value) { // This is faster than a lookup table uint32_t retVal = value; @@ -23,11 +23,11 @@ static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t val #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE4_2 +#ifdef LV_HAVE_SSE4_2 #include <nmmintrin.h> -static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value) { *ret = _mm_popcnt_u32(value); } diff --git a/volk/include/volk/volk_64f_convert_32f_a16.h b/volk/include/volk/volk_64f_convert_32f_a.h index 7dca065f0..2126e4f95 100644 --- a/volk/include/volk/volk_64f_convert_32f_a16.h +++ b/volk/include/volk/volk_64f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_convert_32f_a16_H -#define INCLUDED_volk_64f_convert_32f_a16_H +#ifndef INCLUDED_volk_64f_convert_32f_a_H +#define INCLUDED_volk_64f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const doub \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const d -#endif /* INCLUDED_volk_64f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_64f_convert_32f_u.h b/volk/include/volk/volk_64f_convert_32f_u.h index 6338c1433..5c323230a 100644 --- a/volk/include/volk/volk_64f_convert_32f_u.h +++ b/volk/include/volk/volk_64f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values diff --git a/volk/include/volk/volk_64f_x2_max_64f_a16.h b/volk/include/volk/volk_64f_x2_max_64f_a.h index 4b0c1f5f1..61a704c52 100644 --- a/volk/include/volk/volk_64f_x2_max_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_max_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H -#define INCLUDED_volk_64f_x2_max_64f_a16_H +#ifndef INCLUDED_volk_64f_x2_max_64f_a_H +#define INCLUDED_volk_64f_x2_max_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -45,7 +45,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */ +#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */ diff --git a/volk/include/volk/volk_64f_x2_min_64f_a16.h b/volk/include/volk/volk_64f_x2_min_64f_a.h index aa961e384..148b72c59 100644 --- a/volk/include/volk/volk_64f_x2_min_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_min_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H -#define INCLUDED_volk_64f_x2_min_64f_a16_H +#ifndef INCLUDED_volk_64f_x2_min_64f_a_H +#define INCLUDED_volk_64f_x2_min_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -45,7 +45,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */ +#endif /* INCLUDED_volk_64f_x2_min_64f_a_H */ diff --git a/volk/include/volk/volk_64u_byteswap_a16.h b/volk/include/volk/volk_64u_byteswap_a.h index 0eefe0138..d4fc74a6e 100644 --- a/volk/include/volk/volk_64u_byteswap_a16.h +++ b/volk/include/volk/volk_64u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64u_byteswap_a16_H -#define INCLUDED_volk_64u_byteswap_a16_H +#ifndef INCLUDED_volk_64u_byteswap_a_H +#define INCLUDED_volk_64u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; __m128i input, byte1, byte2, byte3, byte4, output; __m128i byte2mask = _mm_set1_epi32(0x00FF0000); @@ -59,13 +59,13 @@ static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int64_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; unsigned int point; for(point = 0; point < num_points; point++){ @@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned -#endif /* INCLUDED_volk_64u_byteswap_a16_H */ +#endif /* INCLUDED_volk_64u_byteswap_a_H */ diff --git a/volk/include/volk/volk_64u_popcnt_a16.h b/volk/include/volk/volk_64u_popcnt_a.h index 59511dc29..bdaa98643 100644 --- a/volk/include/volk/volk_64u_popcnt_a16.h +++ b/volk/include/volk/volk_64u_popcnt_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_64u_popcnt_a16_H -#define INCLUDED_volk_64u_popcnt_a16_H +#ifndef INCLUDED_volk_64u_popcnt_a_H +#define INCLUDED_volk_64u_popcnt_a_H #include <stdio.h> #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) { const uint32_t* valueVector = (const uint32_t*)&value; @@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t val #include <nmmintrin.h> -static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value) { *ret = _mm_popcnt_u64(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_volk_64u_popcnt_a16_H*/ +#endif /*INCLUDED_volk_64u_popcnt_a_H*/ diff --git a/volk/include/volk/volk_8i_convert_16i_a16.h b/volk/include/volk/volk_8i_convert_16i_a.h index 3d7045753..9104f90cb 100644 --- a/volk/include/volk/volk_8i_convert_16i_a16.h +++ b/volk/include/volk/volk_8i_convert_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8i_convert_16i_a16_H -#define INCLUDED_volk_8i_convert_16i_a16_H +#ifndef INCLUDED_volk_8i_convert_16i_a_H +#define INCLUDED_volk_8i_convert_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -13,7 +13,7 @@ \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -47,14 +47,14 @@ static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const i } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -65,16 +65,16 @@ static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); -static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ - volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points); +extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); +static inline void volk_8i_convert_16i_a_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ + volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points); } #endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8i_convert_16i_u.h b/volk/include/volk/volk_8i_convert_16i_u.h index bcff13406..7d7104f52 100644 --- a/volk/include/volk/volk_8i_convert_16i_u.h +++ b/volk/include/volk/volk_8i_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a.h index 99a24ec10..7f2623ac6 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H -#define INCLUDED_volk_8i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H +#define INCLUDED_volk_8i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -66,7 +66,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer @@ -74,7 +74,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -86,7 +86,7 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer @@ -94,10 +94,10 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); -static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); +static inline void volk_8i_s32f_convert_32f_a_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float invscalar = 1.0 / scalar; - volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points); + volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points); } #endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_u.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h index 1e30957e8..3cd6bb67c 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -67,7 +67,7 @@ static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h index 91c9b2c58..8f13da32f 100644 --- a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H -#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H +#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H +#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -51,7 +51,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data \param complexVector The complex input vector @@ -59,7 +59,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -74,4 +74,4 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, in -#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */ diff --git a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h index bf3dc20dd..d26b3d0d0 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H -#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H +#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -42,14 +42,14 @@ static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, c #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, -#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h index 13de79423..21efed83e 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -43,14 +43,14 @@ static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, cons } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h index 22c3ebb23..b723c6f8b 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H +#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -14,7 +15,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -74,7 +75,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -84,7 +85,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -95,7 +96,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo __m128 invScalar = _mm_set_ps1(1.0/scalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(complexVectorPtr[0]); @@ -136,7 +137,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data \param complexVector The complex input vector @@ -145,7 +146,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -161,4 +162,4 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, -#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h index 5f1430394..74073f5a6 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H -#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H +#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -13,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -61,7 +62,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -70,7 +71,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -81,7 +82,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c __m128 invScalar = _mm_set_ps1(iScalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -107,7 +108,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I float vector data \param complexVector The complex input vector @@ -115,7 +116,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; @@ -130,4 +131,4 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffe -#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h index d9cacbf46..0bb76f1d1 100644 --- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H -#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H +#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H +#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -23,7 +23,6 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); for(;number < quarterPoints; number++){ // Convert into 8 bit values into 16 bit values @@ -37,7 +36,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -56,10 +55,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); @@ -68,7 +67,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -76,7 +75,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; int16_t* c16Ptr = (int16_t*)cVector; int8_t* a8Ptr = (int8_t*)aVector; @@ -84,10 +83,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV for(number =0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); @@ -99,4 +98,4 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV -#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */ +#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */ diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h index 6ec923a4f..3e05608a4 100644 --- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H -#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H +#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H +#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -24,7 +24,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); + __m128 invScalar = _mm_set_ps1(1.0/scalar); for(;number < quarterPoints; number++){ @@ -39,7 +39,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -75,10 +75,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cFloatPtr++ = lv_creal(temp) / scalar; @@ -87,7 +87,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -95,7 +95,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; float* cPtr = (float*)cVector; const float invScalar = 1.0 / scalar; @@ -104,10 +104,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_ for(number = 0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cPtr++ = (lv_creal(temp) * invScalar); @@ -119,4 +119,4 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_ -#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */ +#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */ diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h index 6f444ad89..2c935d1fb 100644 --- a/volk/include/volk/volk_common.h +++ b/volk/include/volk/volk_common.h @@ -1,18 +1,94 @@ -#ifndef INCLUDED_LIBVECTOR_COMMON_H -#define INCLUDED_LIBVECTOR_COMMON_H +#ifndef INCLUDED_LIBVOLK_COMMON_H +#define INCLUDED_LIBVOLK_COMMON_H + +//////////////////////////////////////////////////////////////////////// +// Cross-platform attribute macros +//////////////////////////////////////////////////////////////////////// +#if defined __GNUC__ +# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x))) +# define __VOLK_ATTR_UNUSED __attribute__((unused)) +# define __VOLK_ATTR_INLINE __attribute__((always_inline)) +# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated)) +# if __GNUC__ >= 4 +# define __VOLK_ATTR_EXPORT __attribute__((visibility("default"))) +# define __VOLK_ATTR_IMPORT __attribute__((visibility("default"))) +# else +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +# endif +#elif _MSC_VER +# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE __forceinline +# define __VOLK_ATTR_DEPRECATED __declspec(deprecated) +# define __VOLK_ATTR_EXPORT __declspec(dllexport) +# define __VOLK_ATTR_IMPORT __declspec(dllimport) +#else +# define __VOLK_ATTR_ALIGNED(x) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE +# define __VOLK_ATTR_DEPRECATED +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// Ignore annoying warnings in MSVC +//////////////////////////////////////////////////////////////////////// +#if defined(_MSC_VER) +# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data +# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2' +#endif + +//////////////////////////////////////////////////////////////////////// +// C-linkage declaration macros +// FIXME: due to the usage of complex.h, require gcc for c-linkage +//////////////////////////////////////////////////////////////////////// +#if defined(__cplusplus) && (__GNUC__) +# define __VOLK_DECL_BEGIN extern "C" { +# define __VOLK_DECL_END } +#else +# define __VOLK_DECL_BEGIN +# define __VOLK_DECL_END +#endif + +//////////////////////////////////////////////////////////////////////// +// Define VOLK_API for library symbols +// http://gcc.gnu.org/wiki/Visibility +//////////////////////////////////////////////////////////////////////// +#ifdef volk_EXPORTS +# define VOLK_API __VOLK_ATTR_EXPORT +#else +# define VOLK_API __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// The bit128 union used by some +//////////////////////////////////////////////////////////////////////// +#include <inttypes.h> + +#ifdef LV_HAVE_SSE +#include <xmmintrin.h> +#endif + +#ifdef LV_HAVE_SSE2 +#include <emmintrin.h> +#endif -#include<inttypes.h> -#if LV_HAVE_MMX -#include<xmmintrin.h> union bit128{ uint16_t i16[8]; uint32_t i[4]; float f[4]; double d[2]; - __m128i int_vec; + + #ifdef LV_HAVE_SSE __m128 float_vec; + #endif + + #ifdef LV_HAVE_SSE2 + __m128i int_vec; __m128d double_vec; + #endif }; -#endif /*LV_HAVE_MMX*/ -#endif /*INCLUDED_LIBVECTOR_COMMON_H*/ +#endif /*INCLUDED_LIBVOLK_COMMON_H*/ diff --git a/volk/include/volk/volk_complex.h b/volk/include/volk/volk_complex.h index f2fd17342..5bd925044 100644 --- a/volk/include/volk/volk_complex.h +++ b/volk/include/volk/volk_complex.h @@ -2,8 +2,21 @@ #define INCLUDE_VOLK_COMPLEX_H /*! - \brief This header file is to prevent issues with having "complex" and "complex.h" variables in the same code as the gcc compiler does not allow that -*/ + * \brief Provide typedefs and operators for all complex types in C and C++. + * + * The typedefs encompass all signed integer and floating point types. + * Each operator function is intended to work across all data types. + * Under C++, these operators are defined as inline templates. + * Under C, these operators are defined as preprocessor macros. + * The use of macros makes the operators agnostic to the type. + * + * The following operator functions are defined: + * - lv_cmake - make a complex type from components + * - lv_creal - get the real part of the complex number + * - lv_cimag - get the imaginary part of the complex number + * - lv_conj - take the conjugate of the complex number + */ + #ifdef __cplusplus #include <complex> @@ -12,60 +25,62 @@ typedef std::complex<int8_t> lv_8sc_t; typedef std::complex<int16_t> lv_16sc_t; typedef std::complex<int32_t> lv_32sc_t; +typedef std::complex<int64_t> lv_64sc_t; typedef std::complex<float> lv_32fc_t; typedef std::complex<double> lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return x.real(); -} - -static inline float lv_cimag(const lv_32fc_t x){ - return x.imag(); +template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){ + return std::complex<T>(r, i); } -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return std::conj(x); +template <typename T> inline typename T::value_type lv_creal(const T &x){ + return x.real(); } -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return std::pow(x, y); +template <typename T> inline typename T::value_type lv_cimag(const T &x){ + return x.imag(); } -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return std::complex<float>(x,y); +template <typename T> inline T lv_conj(const T &x){ + return std::conj(x); } -#else +#else /* __cplusplus */ #include <complex.h> typedef char complex lv_8sc_t; typedef short complex lv_16sc_t; -typedef int complex lv_32sc_t; +typedef long complex lv_32sc_t; +typedef long long complex lv_64sc_t; typedef float complex lv_32fc_t; typedef double complex lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return creal(x); -} +#define lv_cmake(r, i) ((r) + _Complex_I*(i)) -static inline float lv_cimag(const lv_32fc_t x){ - return cimag(x); -} +// When GNUC is available, use the complex extensions. +// The extensions always return the correct value type. +// http://gcc.gnu.org/onlinedocs/gcc/Complex.html +#ifdef __GNUC__ -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return conj(x); -} +#define lv_creal(x) (__real__(x)) -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return cpow(x, y); -} +#define lv_cimag(x) (__imag__(x)) -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return x + I*y; -} +#define lv_conj(x) (~(x)) + +// When not available, use the c99 complex function family, +// which always returns double regardless of the input type. +#else /* __GNUC__ */ + +#define lv_creal(x) (creal(x)) + +#define lv_cimag(x) (cimag(x)) + +#define lv_conj(x) (conj(x)) -#endif +#endif /* __GNUC__ */ +#endif /* __cplusplus */ #endif /* INCLUDE_VOLK_COMPLEX_H */ diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h new file mode 100644 index 000000000..2a7f7e79f --- /dev/null +++ b/volk/include/volk/volk_prefs.h @@ -0,0 +1,25 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +#include <volk/volk_common.h> + +__VOLK_DECL_BEGIN + +struct VOLK_API volk_arch_pref { + char name[128]; + char arch[32]; +}; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +VOLK_API void get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +VOLK_API int load_preferences(struct volk_arch_pref **); + +__VOLK_DECL_END + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 6a5fde28f..28ec6ddaa 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -1,23 +1,4 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.c -/volk_cpu_generic.c -/volk_cpu_powerpc.c -/volk_cpu_x86.c -/volk_environment_init.c -/volk_init.c -/volk_init.h -/volk_mktables -/volk_mktables.c -/volk_proccpu_sim.c -/volk_runtime.c -/test_all +/Makefile.am /testqa diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt new file mode 100644 index 000000000..33a478265 --- /dev/null +++ b/volk/lib/CMakeLists.txt @@ -0,0 +1,261 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Parse the arches xml file: +# Test each arch to see if the compiler supports the flag. +# If the test passes append the arch to the available list. +######################################################################## +#extract the arch lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))" + OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +#This macro sets the ${arch}_flag variable, +#and handles special cases for MSVC arch flags. +MACRO(set_arch_flag name flag) + IF(MSVC AND ${name} STREQUAL "mmx") + SET(${name}_flag "/arch:SSE") #no /arch:MMX + ELSEIF(MSVC AND ${name} STREQUAL "sse") + SET(${name}_flag "/arch:SSE") + ELSEIF(MSVC AND ${name} STREQUAL "sse2") + SET(${name}_flag "/arch:SSE2") + ELSE() + SET(${name}_flag -${flag}) + ENDIF() +ENDMACRO(set_arch_flag) + +MACRO(handle_arch name flag) + + #handle special case for none flag + IF(${flag} STREQUAL "none") + SET(have_${name} TRUE) + + #otherwise test the flag against the compiler + ELSE() + INCLUDE(CheckCXXCompilerFlag) + set_arch_flag(${name} ${flag}) + CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name}) + ENDIF() + + IF(have_${name}) + LIST(APPEND available_arches ${name}) + ENDIF() +ENDMACRO(handle_arch) + +#create a list of available arches +FOREACH(arch_line ${arch_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}") + handle_arch(${args}) +ENDFOREACH(arch_line) + +MESSAGE(STATUS "Available arches: ${available_arches}") + +######################################################################## +# Parse the machines xml file: +# Test each machine to see if its arch dependencies are supported. +# Build a list of supported machines and the machine definitions. +######################################################################## +#extract the machine lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))" + OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +MACRO(handle_machine1 name) + UNSET(machine_flags) + STRING(TOUPPER LV_MACHINE_${name} machine_def) + + #check if all the arches are supported + FOREACH(arch ${ARGN}) + SET(is_match ${have_${arch}}) + IF(NOT is_match) + SET(is_match FALSE) + BREAK() + ENDIF(NOT is_match) + SET(machine_flags "${machine_flags} ${${arch}_flag}") + ENDFOREACH(arch) + + IF(is_match) + #this is a match, append the source and set its flags + SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) + SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) + LIST(APPEND machine_sources ${machine_source}) + LIST(APPEND machine_defs ${machine_def}) + LIST(APPEND available_machines ${name}) + ENDIF() +ENDMACRO(handle_machine1) + +MACRO(handle_machine name) + SET(arches ${ARGN}) + LIST(FIND arches "32|64" index) + IF(${index} EQUAL -1) + handle_machine1(${name} ${arches}) + ELSE() + LIST(REMOVE_ITEM arches "32|64") + handle_machine1(${name}_32 32 ${arches}) + handle_machine1(${name}_64 64 ${arches}) + ENDIF() +ENDMACRO(handle_machine) + +#setup the available machines +FOREACH(machine_line ${machine_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}") + handle_machine(${args}) +ENDFOREACH(machine_line) + +MESSAGE(STATUS "Available machines: ${available_machines}") + +######################################################################## +# Create rules to run the volk generator +######################################################################## +#list of the generated sources +SET(volk_gen_sources + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/lib/volk.c + ${CMAKE_BINARY_DIR}/lib/volk_init.h + ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/lib/volk_cpu.c + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.c + ${machine_sources} +) + +#dependencies are all python, xml, and header implementation files +FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) + +ADD_CUSTOM_COMMAND( + OUTPUT ${volk_gen_sources} + DEPENDS ${xml_files} ${py_files} ${h_files} + COMMAND ${PYTHON_EXECUTABLE} -B + ${CMAKE_SOURCE_DIR}/gen/volk_register.py + ${CMAKE_BINARY_DIR} +) + +######################################################################## +# Handle orc support +######################################################################## +FIND_PACKAGE(PkgConfig) +IF(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4") +ENDIF(PKG_CONFIG_FOUND) + +FIND_PROGRAM(ORCC_EXECUTABLE orcc) + +IF(ORC_FOUND AND ORCC_EXECUTABLE) + #setup orc library usage + INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) + LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) + ADD_DEFINITIONS(-DLV_HAVE_ORC) + + #setup orc functions + FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + FOREACH(orc_file ${orc_files}) + + #extract the name for the generated c source from the orc file + GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE) + SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + + #create a rule to generate the source and add to the list of sources + ADD_CUSTOM_COMMAND( + COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} + DEPENDS ${orc_file} OUTPUT ${orcc_gen} + ) + LIST(APPEND volk_sources ${orcc_gen}) + + ENDFOREACH(orc_file) +ELSE() + MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...") +ENDIF() + +######################################################################## +# Setup the volk sources list and library +######################################################################## +IF(NOT WIN32) + ADD_DEFINITIONS(-fvisibility=hidden) +ENDIF() + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +LIST(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${volk_gen_sources} +) + +#set the machine definitions where applicable +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_BINARY_DIR}/volk.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c +PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") + +IF(MSVC) + #add compatibility includes for stdint types + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) + #compile the sources as C++ due to the lack of complex.h under MSVC + SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) +ENDIF(MSVC) + +#create the volk runtime library +ADD_LIBRARY(volk SHARED ${volk_sources}) +TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) +SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) +SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") + +INSTALL(TARGETS volk + LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file + RUNTIME DESTINATION bin # .dll file +) + +######################################################################## +# Build the QA test application +######################################################################## +FIND_PACKAGE(Boost COMPONENTS unit_test_framework) + +IF(Boost_FOUND) + +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES + COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" +) + +INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) + +ADD_EXECUTABLE(test_all + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) +ADD_TEST(qa_volk_test_all test_all) + +ENDIF() diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am deleted file mode 100644 index 473acd2a6..000000000 --- a/volk/lib/Makefile.am +++ /dev/null @@ -1,158 +0,0 @@ -# -# Copyright 2010,2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -include $(top_srcdir)/Makefile.common - -#FIXME: forcing the top_builddir for distcheck seems like a bit -# of a hack. Figure out the right way to do this to find built -# volk_config.h and volk_tables.h - -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ - -I$(top_builddir)/include \ - $(LV_CXXFLAGS) $(WITH_INCLUDES) - - -# We build 1 library and 1 executable here. The library contains -# everything except the QA code. The C++ QA code is especially recommended -# when you have general purpose C or C++ code that may not get -# thoroughly exercised by building and running a GR block. The -# executable runs the QA code at "make check" time. -# -# -# -# N.B., If there's a SWIG generated shared library and associated -# python code, it will be contained in ../python, not here. (That -# code is conditionally built depending on the state of the -# --without-python configure option.) However, the .i should be here -# next to the .h that it's based on. - - -# list of programs run by "make check" and "make distcheck" -#TESTS = testqa -#orc stuff gets built in the ORC directory conditional to ORC being enabled. -#it gets linked in during the build of libvolk as an added library. -#there might be a better way to do this. - -lib_LTLIBRARIES = \ - libvolk.la \ - libvolk_runtime.la - -EXTRA_DIST = \ - volk_mktables.c \ - volk_rank_archs.h \ - volk_proccpu_sim.c \ - gcc_x86_cpuid.h - -# ---------------------------------------------------------------- -# The main library -# ---------------------------------------------------------------- - -libvolk_runtime_la_SOURCES = \ - $(platform_CODE) \ - volk_runtime.c \ - volk_init.c \ - volk_rank_archs.c - -libvolk_la_SOURCES = \ - $(platform_CODE) \ - volk.c \ - volk_environment_init.c - -volk_orc_LDFLAGS = \ - $(ORC_LDFLAGS) \ - -lorc-0.4 - -volk_orc_LIBADD = \ - ../orc/libvolk_orc.la - -if LV_HAVE_ORC -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_la_LIBADD = $(volk_orc_LIBADD) -else -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_la_LIBADD = -endif - - -# ---------------------------------------------------------------- -# The QA library. Note libvolk.la in LIBADD -# ---------------------------------------------------------------- -#libvolk_qa_la_SOURCES = \ -# qa_utils.cc - -#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost - -#libvolk_qa_la_LIBADD = \ -# libvolk.la \ -# libvolk_runtime.la - -# ---------------------------------------------------------------- -# headers that don't get installed -# ---------------------------------------------------------------- -noinst_HEADERS = \ - volk_init.h \ - qa_utils.h - -# ---------------------------------------------------------------- -# Our test program -# ---------------------------------------------------------------- -noinst_PROGRAMS = \ - testqa - -testqa_SOURCES = testqa.cc qa_utils.cc -testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) \ - $(BOOST_CPPFLAGS) -testqa_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) -if LV_HAVE_ORC -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la \ - ../orc/libvolk_orc.la -else -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la -endif - -distclean-local: - rm -f volk.c - rm -f volk_cpu_generic.c - rm -f volk_cpu_powerpc.c - rm -f volk_cpu_x86.c - rm -f volk_init.c - rm -f volk_init.h - rm -f volk_mktables.c - rm -f volk_proccpu_sim.c - rm -f volk_runtime.c - rm -f volk_tables.h - rm -f volk_environment_init.c -#SUBDIRS = - -#ifdef BUILD_SSE -#SUBDIRS += sse -#elif BUILD_SPU -#SUBDIRS += spu -#else -#SUBDIRS += port -#endif - - diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 154aa0f17..5d5eb7e18 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() { double total; const int vlen = 3200; const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); - short input4[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); - short output2[vlen] __attribute__ ((aligned (16))); - short output3[vlen] __attribute__ ((aligned (16))); - short output01[vlen] __attribute__ ((aligned (16))); - short output11[vlen] __attribute__ ((aligned (16))); - short output21[vlen] __attribute__ ((aligned (16))); - short output31[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 62deffaeb..2e6e6a1a0 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short target3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; - short cntl0[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl1[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl2[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; - short cntl3[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 819b2256b..3cd4e906d 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))); - short cntl0[vlen] __attribute__ ((aligned (16))); - short cntl1[vlen] __attribute__ ((aligned (16))); - short cntl2[vlen] __attribute__ ((aligned (16))); - short cntl3[vlen] __attribute__ ((aligned (16))); - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; for(int i = 0; i < vlen; ++i) { src0[i] = i; diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 66f8c9afa..192a69e35 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = (short) (rand() - (RAND_MAX/2)); diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index 592304f83..a2e7a85be 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index a3d0955bd..981bb19e6 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen]; - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float scalar = vlen; const float rbw = 1.7; diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 618a82a02..c880260f2 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint32_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t input0; - uint32_t output0 __attribute__ ((aligned (16))); - uint32_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t output0; + __VOLK_ATTR_ALIGNED(16) uint32_t output01; input0 = ((uint32_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 85ef58795..6be4e50ea 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint64_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t input0; - uint64_t output0 __attribute__ ((aligned (16))); - uint64_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t output0; + __VOLK_ATTR_ALIGNED(16) uint64_t output01; input0 = ((uint64_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index b0f63d2b5..7f86dd78b 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,16 +3,16 @@ #include <boost/foreach.hpp> #include <boost/assign/list_of.hpp> #include <boost/tokenizer.hpp> -//#include <boost/test/unit_test.hpp> #include <iostream> #include <vector> #include <list> #include <ctime> #include <cmath> +#include <limits> #include <boost/lexical_cast.hpp> -//#include <volk/volk_runtime.h> -#include <volk/volk_registry.h> #include <volk/volk.h> +#include <volk/volk_cpu.h> +#include <volk/volk_common.h> #include <boost/typeof/typeof.hpp> #include <boost/type_traits.hpp> @@ -62,50 +62,14 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -static std::vector<std::string> get_arch_list(const int archs[]) { +static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { std::vector<std::string> archlist; - int num_archs = archs[0]; - - //there has got to be a way to query these arches - for(int i = 0; i < num_archs; i++) { - switch(archs[i+1]) { - case (1<<LV_GENERIC): - archlist.push_back("generic"); - break; - case (1<<LV_ORC): - archlist.push_back("orc"); - break; - case (1<<LV_SSE): - archlist.push_back("sse"); - break; - case (1<<LV_SSE2): - archlist.push_back("sse2"); - break; - case (1<<LV_SSE3): - archlist.push_back("sse3"); - break; - case (1<<LV_SSSE3): - archlist.push_back("ssse3"); - break; - case (1<<LV_SSE4_1): - archlist.push_back("sse4_1"); - break; - case (1<<LV_SSE4_2): - archlist.push_back("sse4_2"); - break; - case (1<<LV_SSE4_A): - archlist.push_back("sse4_a"); - break; - case (1<<LV_MMX): - archlist.push_back("mmx"); - break; - case (1<<LV_AVX): - archlist.push_back("avx"); - break; - default: - break; - } + + for(int i = 0; i < desc.n_archs; i++) { + //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc + archlist.push_back(std::string(desc.indices[i])); } + return archlist; } @@ -256,7 +220,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i<vlen; i++) { - if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) { + if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; @@ -269,7 +233,8 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { class volk_qa_aligned_mem_pool{ public: - void *get_new(size_t size, size_t alignment = 16){ + void *get_new(size_t size){ + size_t alignment = volk_get_alignment(); _mems.push_back(std::vector<char>(size + alignment-1, 0)); size_t ptr = size_t(&_mems.back().front()); return (void *)((ptr + alignment-1) & ~(alignment-1)); @@ -277,11 +242,19 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector<std::string> *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test - std::vector<std::string> arch_list = get_arch_list(archs); + std::vector<std::string> arch_list = get_arch_list(desc); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; @@ -334,6 +307,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //now run the test clock_t start, end; + std::vector<double> profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -368,8 +342,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -381,7 +359,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, bool fail = false; bool fail_global = false; + std::vector<bool> arch_results; for(int i=0; i<arch_list.size(); i++) { + fail = false; if(i != generic_offset) { for(int j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { @@ -432,6 +412,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); } } + arch_results.push_back(!fail); + } + + double best_time = std::numeric_limits<double>::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 1b64bacaa..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,9 @@ #include <cstdlib> #include <string> +#include <vector> +#include <volk/volk.h> +#include <volk/volk_common.h> struct volk_type_t { bool is_float; @@ -18,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 779bc61eb..62e62c2f4 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -1,100 +1,93 @@ #include "qa_utils.h" #include <volk/volk.h> -#include <volk/volk_registry.h> #include <boost/test/unit_test.hpp> -BOOST_AUTO_TEST_CASE(volk_test_all) { - //in order... -// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); -// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 100); +VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000); +//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000); +//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000); +VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 50); +VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 100); +//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 100); +VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 10000); +//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 100); +VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 20460, 5000); +//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 0, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 100); +VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 100); +VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 2000); +//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1000); +//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 400); +VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 400); +VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000); +VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000); -} diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..9743c51d9 --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <volk/volk_prefs.h> + +//#if defined(_WIN32) +//#include <Windows.h> +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.volk/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can (***********) +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index b1a93db26..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,10 +1,40 @@ -#include<volk_rank_archs.h> -#include<stdio.h> +#include <volk_rank_archs.h> +#include <volk/volk_prefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) { - int i = 2; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i<n_archs; i++) { + if(!strncmp(indices[i], arch_name, 20)) { + return i; + } + } + //something terrible should happen here + printf("Volk warning: no arch found, returning generic impl\n"); + return get_index(indices, n_archs, "generic"); //but we'll fake it for now +} + +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { + int i; unsigned int best_val = 0; - for(; i < arch_defs[0] + 1; ++i) { + static struct volk_arch_pref *volk_arch_prefs; + static int n_arch_prefs = 0; + static int prefs_loaded = 0; + if(!prefs_loaded) { + n_arch_prefs = load_preferences(&volk_arch_prefs); + prefs_loaded = 1; + } + + //now look for the function name in the prefs list + for(i=0; i < n_arch_prefs; i++) { + if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it + return get_index(indices, n_archs, volk_arch_prefs[i].arch); + } + } + + for(i=1; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 26b9f7503..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,8 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch); - +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } diff --git a/volk/msvc/inttypes.h b/volk/msvc/inttypes.h new file mode 100644 index 000000000..1c2baa82e --- /dev/null +++ b/volk/msvc/inttypes.h @@ -0,0 +1,301 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <stdint.h> + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/volk/msvc/stdint.h b/volk/msvc/stdint.h new file mode 100644 index 000000000..ab6d37e11 --- /dev/null +++ b/volk/msvc/stdint.h @@ -0,0 +1,251 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#ifndef INTMAX_C +#define INTMAX_C INT64_C +#endif +#ifndef UINTMAX_C +#define UINTMAX_C UINT64_C +#endif + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am index 6b5e4f8b6..814471bd0 100644 --- a/volk/orc/Makefile.am +++ b/volk/orc/Makefile.am @@ -21,31 +21,31 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS) include $(top_srcdir)/Makefile.common -lib_LTLIBRARIES = libvolk_orc.la +noinst_LTLIBRARIES = libvolk_orc.la libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS) libvolk_orc_la_SOURCES = \ -volk_8i_convert_16i_a16_orc_impl.orc \ -volk_8i_s32f_convert_32f_a16_orc_impl.orc \ -volk_16u_byteswap_a16_orc_impl.orc \ -volk_32i_x2_and_32i_a16_orc_impl.orc \ -volk_32i_x2_or_32i_a16_orc_impl.orc \ -volk_32f_x2_add_32f_a16_orc_impl.orc \ -volk_32f_x2_subtract_32f_a16_orc_impl.orc \ -volk_32f_x2_divide_32f_a16_orc_impl.orc \ -volk_32f_x2_multiply_32f_a16_orc_impl.orc \ -volk_32fc_x2_multiply_32fc_a16_orc_impl.orc \ -volk_32fc_32f_multiply_32fc_a16_orc_impl.orc \ -volk_32f_sqrt_32f_a16_orc_impl.orc \ -volk_32f_x2_max_32f_a16_orc_impl.orc \ -volk_32f_x2_min_32f_a16_orc_impl.orc \ -volk_32f_s32f_normalize_a16_orc_impl.orc \ -volk_32fc_magnitude_32f_a16_orc_impl.orc \ -volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc \ -volk_16ic_magnitude_16i_a16_orc_impl.orc \ -volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc \ -volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc \ -volk_16ic_deinterleave_real_8i_a16_orc_impl.orc +volk_8i_convert_16i_a_orc_impl.orc \ +volk_8i_s32f_convert_32f_a_orc_impl.orc \ +volk_16u_byteswap_a_orc_impl.orc \ +volk_32i_x2_and_32i_a_orc_impl.orc \ +volk_32i_x2_or_32i_a_orc_impl.orc \ +volk_32f_x2_add_32f_a_orc_impl.orc \ +volk_32f_x2_subtract_32f_a_orc_impl.orc \ +volk_32f_x2_divide_32f_a_orc_impl.orc \ +volk_32f_x2_multiply_32f_a_orc_impl.orc \ +volk_32fc_x2_multiply_32fc_a_orc_impl.orc \ +volk_32fc_32f_multiply_32fc_a_orc_impl.orc \ +volk_32f_sqrt_32f_a_orc_impl.orc \ +volk_32f_x2_max_32f_a_orc_impl.orc \ +volk_32f_x2_min_32f_a_orc_impl.orc \ +volk_32f_s32f_normalize_a_orc_impl.orc \ +volk_32fc_magnitude_32f_a_orc_impl.orc \ +volk_32fc_s32f_magnitude_16i_a_orc_impl.orc \ +volk_16ic_magnitude_16i_a_orc_impl.orc \ +volk_16ic_deinterleave_16i_x2_a_orc_impl.orc \ +volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc \ +volk_16ic_deinterleave_real_8i_a_orc_impl.orc diff --git a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc index 0189fbf5d..fd8915da0 100644 --- a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc +++ b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl +.function volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl .dest 4 idst .dest 4 qdst .source 4 src diff --git a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc index 56018edda..76faa936a 100644 --- a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc +++ b/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_16ic_deinterleave_16i_x2_a16_orc_impl +.function volk_16ic_deinterleave_16i_x2_a_orc_impl .dest 2 idst .dest 2 qdst .source 4 src diff --git a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc index dba9a4c8e..8db49fd7c 100644 --- a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc +++ b/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_16ic_deinterleave_real_8i_a16_orc_impl +.function volk_16ic_deinterleave_real_8i_a_orc_impl .dest 1 dst .source 4 src .temp 2 iw diff --git a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc index 37225e9b8..fbaebc46d 100644 --- a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc +++ b/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_16ic_magnitude_16i_a16_orc_impl +.function volk_16ic_magnitude_16i_a_orc_impl .source 4 src .dest 2 dst .floatparam 4 scalar diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc index 1e2380837..66fef7d2e 100644 --- a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc +++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_16ic_magnitude_32f_a16_orc_impl +.function volk_16ic_magnitude_32f_a_orc_impl .source 4 src .dest 4 dst .floatparam 4 scalar diff --git a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc deleted file mode 100644 index c1c8ee59e..000000000 --- a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc +++ /dev/null @@ -1,3 +0,0 @@ -.function volk_16u_byteswap_a16_orc_impl -.dest 2 dst -swapw dst, dst diff --git a/volk/orc/volk_16u_byteswap_a_orc_impl.orc b/volk/orc/volk_16u_byteswap_a_orc_impl.orc new file mode 100644 index 000000000..b96ba84af --- /dev/null +++ b/volk/orc/volk_16u_byteswap_a_orc_impl.orc @@ -0,0 +1,3 @@ +.function volk_16u_byteswap_a_orc_impl +.dest 2 dst +swapw dst, dst diff --git a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc b/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc index acd319b16..986fdf665 100644 --- a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc +++ b/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_s32f_normalize_a16_orc_impl +.function volk_32f_s32f_normalize_a_orc_impl .source 4 src1 .floatparam 4 invscalar .dest 4 dst diff --git a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc b/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc index ae5680f15..f339b1122 100644 --- a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_sqrt_32f_a16_orc_impl +.function volk_32f_sqrt_32f_a_orc_impl .source 4 src .dest 4 dst sqrtf dst, src diff --git a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc index 8d095a052..450cc6a9e 100644 --- a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_add_32f_a16_orc_impl +.function volk_32f_x2_add_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc index 0097646cb..ee3b61b82 100644 --- a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_divide_32f_a16_orc_impl +.function volk_32f_x2_divide_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc b/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc new file mode 100644 index 000000000..b367f3091 --- /dev/null +++ b/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc @@ -0,0 +1,6 @@ +.function volk_32f_x2_dot_prod_32f_a_orc_impl +.source 4 src1 +.source 4 src2 +.dest 4 dst +.accumulator 4 accum +addf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc index b7f008737..725201633 100644 --- a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_max_32f_a16_orc_impl +.function volk_32f_x2_max_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc index 78328b576..a71ed8250 100644 --- a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_min_32f_a16_orc_impl +.function volk_32f_x2_min_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc index e8fadff19..c17d539fd 100644 --- a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_multiply_32f_a16_orc_impl +.function volk_32f_x2_multiply_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc index 13fbe8c83..b3b0f256e 100644 --- a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32f_x2_subtract_32f_a16_orc_impl +.function volk_32f_x2_subtract_32f_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc index 455293cff..aa82699f5 100644 --- a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc +++ b/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32fc_32f_multiply_32fc_a16_orc_impl +.function volk_32fc_32f_multiply_32fc_a_orc_impl .source 8 src1 .source 4 src2 .dest 8 dst diff --git a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc index c5e2e57f1..032ab2b1b 100644 --- a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc +++ b/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32fc_magnitude_32f_a16_orc_impl +.function volk_32fc_magnitude_32f_a_orc_impl .source 8 src .dest 4 dst .temp 8 iqf diff --git a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc index 6116f5e1f..505e73f5d 100644 --- a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc +++ b/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32fc_s32f_magnitude_16i_a16_orc_impl +.function volk_32fc_s32f_magnitude_16i_a_orc_impl .source 8 src .dest 2 dst .floatparam 4 scalar diff --git a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc index a27d722cd..cb8a12d81 100644 --- a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc +++ b/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32fc_x2_multiply_32fc_a16_orc_impl +.function volk_32fc_x2_multiply_32fc_a_orc_impl .source 8 src1 .source 8 src2 .dest 8 dst diff --git a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc index 7b331f8ed..1845e4654 100644 --- a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc +++ b/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32i_x2_and_32i_a16_orc_impl +.function volk_32i_x2_and_32i_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc index 4984a9ced..004663f42 100644 --- a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc +++ b/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_32i_x2_or_32i_a16_orc_impl +.function volk_32i_x2_or_32i_a_orc_impl .dest 4 dst .source 4 src1 .source 4 src2 diff --git a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc b/volk/orc/volk_8i_convert_16i_a_orc_impl.orc index f44845c88..d813c6cfa 100644 --- a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc +++ b/volk/orc/volk_8i_convert_16i_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_8i_convert_16i_a16_orc_impl +.function volk_8i_convert_16i_a_orc_impl .source 1 src .dest 2 dst convsbw dst, src diff --git a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc b/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc index 8f6e157e9..ad54fb1e1 100644 --- a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc +++ b/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc @@ -1,4 +1,4 @@ -.function volk_8i_s32f_convert_32f_a16_orc_impl +.function volk_8i_s32f_convert_32f_a_orc_impl .source 1 src .dest 4 dst .floatparam 4 scalar diff --git a/volk/volk.pc.in b/volk/volk.pc.in index 85425ba64..58e976786 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -5,11 +5,10 @@ includedir=@includedir@ LV_CXXFLAGS=@LV_CXXFLAGS@ - Name: volk -Description: VOLK.. Vector Optimized Library of Kernels +Description: VOLK: Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime @LV_ORC_PKGCONFIG@ +Libs: -lvolk Cflags: -I${includedir} ${LV_CXXFLAGS} |