diff options
Diffstat (limited to 'volk/include')
-rw-r--r-- | volk/include/volk/.gitignore | 18 | ||||
-rw-r--r-- | volk/include/volk/Makefile.am | 202 | ||||
-rw-r--r-- | volk/include/volk/archs.xml | 139 | ||||
-rw-r--r-- | volk/include/volk/emit_omnilog.py | 13 | ||||
-rw-r--r-- | volk/include/volk/make_c.py | 73 | ||||
-rw-r--r-- | volk/include/volk/make_config_fixed.py | 21 | ||||
-rw-r--r-- | volk/include/volk/make_config_in.py | 13 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_generic_c.py | 60 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_h.py | 48 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_powerpc_c.py | 67 | ||||
-rw-r--r-- | volk/include/volk/make_cpuid_x86_c.py | 133 | ||||
-rw-r--r-- | volk/include/volk/make_environment_init_c.py | 33 | ||||
-rw-r--r-- | volk/include/volk/make_environment_init_h.py | 18 | ||||
-rw-r--r-- | volk/include/volk/make_h.py | 28 | ||||
-rw-r--r-- | volk/include/volk/make_init_c.py | 42 | ||||
-rw-r--r-- | volk/include/volk/make_init_h.py | 26 | ||||
-rw-r--r-- | volk/include/volk/make_mktables.py | 33 | ||||
-rw-r--r-- | volk/include/volk/make_proccpu_sim.py | 47 | ||||
-rw-r--r-- | volk/include/volk/make_registry.py | 62 | ||||
-rw-r--r-- | volk/include/volk/make_runtime.py | 34 | ||||
-rw-r--r-- | volk/include/volk/make_runtime_c.py | 47 | ||||
-rw-r--r-- | volk/include/volk/make_set_simd.py | 272 | ||||
-rw-r--r-- | volk/include/volk/make_typedefs.py | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_branch_4_state_8_a.h (renamed from volk/include/volk/volk_16i_branch_4_state_8_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_a.h (renamed from volk/include/volk/volk_16i_convert_8i_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_convert_8i_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_16i_a.h (renamed from volk/include/volk/volk_16i_max_star_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_max_star_horizontal_16i_a.h (renamed from volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_permute_and_scalar_add_a.h (renamed from volk/include/volk/volk_16i_permute_and_scalar_add_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_16i_s32f_convert_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_s32f_convert_32f_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h (renamed from volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h (renamed from volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_deinterleave_real_8i_a.h (renamed from volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_magnitude_16i_a.h (renamed from volk/include/volk/volk_16ic_magnitude_16i_a16.h) | 35 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h (renamed from volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h) | 31 | ||||
-rw-r--r-- | volk/include/volk/volk_16u_byteswap_a.h (renamed from volk/include/volk/volk_16u_byteswap_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_accumulator_s32f_a.h (renamed from volk/include/volk/volk_32f_accumulator_s32f_a16.h) | 17 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_a.h (renamed from volk/include/volk/volk_32f_convert_64f_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_convert_64f_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_index_max_16u_a.h (renamed from volk/include/volk/volk_32f_index_max_16u_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_16i_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_16i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_32i_a16.h) | 55 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_32i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_a.h (renamed from volk/include/volk/volk_32f_s32f_convert_8i_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_convert_8i_u.h | 6 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_normalize_a.h (renamed from volk/include/volk/volk_32f_s32f_normalize_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_power_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_power_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_s32f_stddev_32f_a.h (renamed from volk/include/volk/volk_32f_s32f_stddev_32f_a16.h) | 23 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_sqrt_32f_a.h (renamed from volk/include/volk/volk_32f_sqrt_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h (renamed from volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_add_32f_a.h (renamed from volk/include/volk/volk_32f_x2_add_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_divide_32f_a.h (renamed from volk/include/volk/volk_32f_x2_divide_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_a.h (renamed from volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h) | 29 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_dot_prod_32f_u.h | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_interleave_32fc_a.h (renamed from volk/include/volk/volk_32f_x2_interleave_32fc_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_max_32f_a.h (renamed from volk/include/volk/volk_32f_x2_max_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_min_32f_a.h (renamed from volk/include/volk/volk_32f_x2_min_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_multiply_32f_a.h (renamed from volk/include/volk/volk_32f_x2_multiply_32f_a16.h) | 61 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h (renamed from volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x2_subtract_32f_a.h (renamed from volk/include/volk/volk_32f_x2_subtract_32f_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h (renamed from volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_32f_multiply_32fc_a.h (renamed from volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_deinterleave_real_64f_a.h (renamed from volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_index_max_16u_a.h (renamed from volk/include/volk/volk_32fc_index_max_16u_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_magnitude_32f_a.h (renamed from volk/include/volk/volk_32fc_magnitude_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_atan2_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h) | 17 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h (renamed from volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h) | 31 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_32fc_a.h (renamed from volk/include/volk/volk_32fc_s32f_power_32fc_a16.h) | 36 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h (renamed from volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h) | 18 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h) | 19 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h) | 27 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_multiply_32fc_a.h (renamed from volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h (renamed from volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_x2_square_dist_32f_a.h (renamed from volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_32i_s32f_convert_32f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_s32f_convert_32f_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_and_32i_a.h (renamed from volk/include/volk/volk_32i_x2_and_32i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32i_x2_or_32i_a.h (renamed from volk/include/volk/volk_32i_x2_or_32i_a16.h) | 22 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_byteswap_a.h (renamed from volk/include/volk/volk_32u_byteswap_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32u_popcnt_a.h (renamed from volk/include/volk/volk_32u_popcnt_a16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_a.h (renamed from volk/include/volk/volk_64f_convert_32f_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_convert_32f_u.h | 2 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_max_64f_a.h (renamed from volk/include/volk/volk_64f_x2_max_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64f_x2_min_64f_a.h (renamed from volk/include/volk/volk_64f_x2_min_64f_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_byteswap_a.h (renamed from volk/include/volk/volk_64u_byteswap_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_64u_popcnt_a.h (renamed from volk/include/volk/volk_64u_popcnt_a16.h) | 12 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_a.h (renamed from volk/include/volk/volk_8i_convert_16i_a16.h) | 20 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_convert_16i_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_a.h (renamed from volk/include/volk/volk_8i_s32f_convert_32f_a16.h) | 20 | ||||
-rw-r--r-- | volk/include/volk/volk_8i_s32f_convert_32f_u.h | 4 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_16i_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h) | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_deinterleave_real_8i_a.h (renamed from volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h) | 8 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h (renamed from volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h (renamed from volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h) | 21 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h (renamed from volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h) | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h (renamed from volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h) | 26 | ||||
-rw-r--r-- | volk/include/volk/volk_common.h | 92 | ||||
-rw-r--r-- | volk/include/volk/volk_complex.h | 79 | ||||
-rw-r--r-- | volk/include/volk/volk_prefs.h | 25 | ||||
-rw-r--r-- | volk/include/volk/volk_regexp.py | 8 | ||||
-rwxr-xr-x | volk/include/volk/volk_register.py | 278 |
116 files changed, 1096 insertions, 2458 deletions
diff --git a/volk/include/volk/.gitignore b/volk/include/volk/.gitignore index be8358f3a..b336cc7ce 100644 --- a/volk/include/volk/.gitignore +++ b/volk/include/volk/.gitignore @@ -1,20 +1,2 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.h -/volk_config.h -/volk_config_fixed.h -/volk_cpu.h -/volk_environment_init.h -/volk_registry.h -/volk_runtime.h -/volk_tables.h -/volk_typedefs.h -/volk_mktables diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 7a5edd624..b7da9b37c 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -20,142 +20,104 @@ include $(top_srcdir)/Makefile.common -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ $(LV_CXXFLAGS) $(WITH_INCLUDES) volkincludedir = $(prefix)/include/volk -BUILT_SOURCES: \ - volk_config.h \ - volk_tables.h - volkinclude_HEADERS = \ volk_complex.h \ volk_common.h \ - volk_config_fixed.h \ - volk_runtime.h \ - volk_config.h \ - volk_tables.h \ - volk_typedefs.h \ - volk_registry.h \ - volk.h \ - volk_cpu.h \ - volk_environment_init.h \ - volk_16i_x5_add_quad_16i_x4_a16.h \ - volk_16i_branch_4_state_8_a16.h \ - volk_16ic_deinterleave_16i_x2_a16.h \ - volk_16ic_s32f_deinterleave_32f_x2_a16.h \ - volk_16ic_deinterleave_real_16i_a16.h \ - volk_16ic_s32f_deinterleave_real_32f_a16.h \ - volk_16ic_deinterleave_real_8i_a16.h \ - volk_16ic_magnitude_16i_a16.h \ - volk_16ic_s32f_magnitude_32f_a16.h \ - volk_16i_s32f_convert_32f_a16.h \ + volk_prefs.h \ + $(top_gendir)/include/volk/volk_config_fixed.h \ + $(top_gendir)/include/volk/volk_typedefs.h \ + $(top_gendir)/include/volk/volk.h \ + $(top_gendir)/include/volk/volk_cpu.h \ + volk_16i_x5_add_quad_16i_x4_a.h \ + volk_16i_branch_4_state_8_a.h \ + volk_16ic_deinterleave_16i_x2_a.h \ + volk_16ic_s32f_deinterleave_32f_x2_a.h \ + volk_16ic_deinterleave_real_16i_a.h \ + volk_16ic_s32f_deinterleave_real_32f_a.h \ + volk_16ic_deinterleave_real_8i_a.h \ + volk_16ic_magnitude_16i_a.h \ + volk_16ic_s32f_magnitude_32f_a.h \ + volk_16i_s32f_convert_32f_a.h \ volk_16i_s32f_convert_32f_u.h \ - volk_16i_convert_8i_a16.h \ + volk_16i_convert_8i_a.h \ volk_16i_convert_8i_u.h \ - volk_16i_max_star_16i_a16.h \ - volk_16i_max_star_horizontal_16i_a16.h \ - volk_16i_permute_and_scalar_add_a16.h \ - volk_16i_x4_quad_max_star_16i_a16.h \ - volk_16u_byteswap_a16.h \ - volk_32f_accumulator_s32f_a16.h \ - volk_32f_x2_add_32f_a16.h \ - volk_32fc_32f_multiply_32fc_a16.h \ - volk_32fc_s32f_power_32fc_a16.h \ - volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \ - volk_32fc_s32f_atan2_32f_a16.h \ - volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \ + volk_16i_max_star_16i_a.h \ + volk_16i_max_star_horizontal_16i_a.h \ + volk_16i_permute_and_scalar_add_a.h \ + volk_16i_x4_quad_max_star_16i_a.h \ + volk_16u_byteswap_a.h \ + volk_32f_accumulator_s32f_a.h \ + volk_32f_x2_add_32f_a.h \ + volk_32fc_32f_multiply_32fc_a.h \ + volk_32fc_s32f_power_32fc_a.h \ + volk_32f_s32f_calc_spectral_noise_floor_32f_a.h \ + volk_32fc_s32f_atan2_32f_a.h \ + volk_32fc_x2_conjugate_dot_prod_32fc_a.h \ volk_32fc_x2_conjugate_dot_prod_32fc_u.h \ - volk_32fc_deinterleave_32f_x2_a16.h \ - volk_32fc_deinterleave_64f_x2_a16.h \ - volk_32fc_s32f_deinterleave_real_16i_a16.h \ - volk_32fc_deinterleave_real_32f_a16.h \ - volk_32fc_deinterleave_real_64f_a16.h \ - volk_32fc_x2_dot_prod_32fc_a16.h \ - volk_32fc_index_max_16u_a16.h \ - volk_32fc_s32f_magnitude_16i_a16.h \ - volk_32fc_magnitude_32f_a16.h \ - volk_32fc_x2_multiply_32fc_a16.h \ - volk_32f_s32f_convert_16i_a16.h \ + volk_32fc_deinterleave_32f_x2_a.h \ + volk_32fc_deinterleave_64f_x2_a.h \ + volk_32fc_s32f_deinterleave_real_16i_a.h \ + volk_32fc_deinterleave_real_32f_a.h \ + volk_32fc_deinterleave_real_64f_a.h \ + volk_32fc_x2_dot_prod_32fc_a.h \ + volk_32fc_index_max_16u_a.h \ + volk_32fc_s32f_magnitude_16i_a.h \ + volk_32fc_magnitude_32f_a.h \ + volk_32fc_x2_multiply_32fc_a.h \ + volk_32f_s32f_convert_16i_a.h \ volk_32f_s32f_convert_16i_u.h \ - volk_32f_s32f_convert_32i_a16.h \ + volk_32f_s32f_convert_32i_a.h \ volk_32f_s32f_convert_32i_u.h \ - volk_32f_convert_64f_a16.h \ + volk_32f_convert_64f_a.h \ volk_32f_convert_64f_u.h \ - volk_32f_s32f_convert_8i_a16.h \ + volk_32f_s32f_convert_8i_a.h \ volk_32f_s32f_convert_8i_u.h \ - volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \ - volk_32fc_s32f_power_spectrum_32f_a16.h \ - volk_32fc_x2_square_dist_32f_a16.h \ - volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \ - volk_32f_x2_divide_32f_a16.h \ - volk_32f_x2_dot_prod_32f_a16.h \ + volk_32fc_s32f_x2_power_spectral_density_32f_a.h \ + volk_32fc_s32f_power_spectrum_32f_a.h \ + volk_32fc_x2_square_dist_32f_a.h \ + volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h \ + volk_32f_x2_divide_32f_a.h \ + volk_32f_x2_dot_prod_32f_a.h \ volk_32f_x2_dot_prod_32f_u.h \ - volk_32f_s32f_32f_fm_detect_32f_a16.h \ - volk_32f_index_max_16u_a16.h \ - volk_32f_x2_s32f_interleave_16ic_a16.h \ - volk_32f_x2_interleave_32fc_a16.h \ - volk_32f_x2_max_32f_a16.h \ - volk_32f_x2_min_32f_a16.h \ - volk_32f_x2_multiply_32f_a16.h \ - volk_32f_s32f_normalize_a16.h \ - volk_32f_s32f_power_32f_a16.h \ - volk_32f_sqrt_32f_a16.h \ - volk_32f_s32f_stddev_32f_a16.h \ - volk_32f_stddev_and_mean_32f_x2_a16.h \ - volk_32f_x2_subtract_32f_a16.h \ - volk_32f_x3_sum_of_poly_32f_a16.h \ - volk_32i_x2_and_32i_a16.h \ - volk_32i_s32f_convert_32f_a16.h \ + volk_32f_s32f_32f_fm_detect_32f_a.h \ + volk_32f_index_max_16u_a.h \ + volk_32f_x2_s32f_interleave_16ic_a.h \ + volk_32f_x2_interleave_32fc_a.h \ + volk_32f_x2_max_32f_a.h \ + volk_32f_x2_min_32f_a.h \ + volk_32f_x2_multiply_32f_a.h \ + volk_32f_s32f_normalize_a.h \ + volk_32f_s32f_power_32f_a.h \ + volk_32f_sqrt_32f_a.h \ + volk_32f_s32f_stddev_32f_a.h \ + volk_32f_stddev_and_mean_32f_x2_a.h \ + volk_32f_x2_subtract_32f_a.h \ + volk_32f_x3_sum_of_poly_32f_a.h \ + volk_32i_x2_and_32i_a.h \ + volk_32i_s32f_convert_32f_a.h \ volk_32i_s32f_convert_32f_u.h \ - volk_32i_x2_or_32i_a16.h \ - volk_32u_byteswap_a16.h \ - volk_32u_popcnt_a16.h \ - volk_64f_convert_32f_a16.h \ + volk_32i_x2_or_32i_a.h \ + volk_32u_byteswap_a.h \ + volk_32u_popcnt_a.h \ + volk_64f_convert_32f_a.h \ volk_64f_convert_32f_u.h \ - volk_64f_x2_max_64f_a16.h \ - volk_64f_x2_min_64f_a16.h \ - volk_64u_byteswap_a16.h \ - volk_64u_popcnt_a16.h \ - volk_8ic_deinterleave_16i_x2_a16.h \ - volk_8ic_s32f_deinterleave_32f_x2_a16.h \ - volk_8ic_deinterleave_real_16i_a16.h \ - volk_8ic_s32f_deinterleave_real_32f_a16.h \ - volk_8ic_deinterleave_real_8i_a16.h \ - volk_8ic_x2_multiply_conjugate_16ic_a16.h \ - volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \ - volk_8i_convert_16i_a16.h \ + volk_64f_x2_max_64f_a.h \ + volk_64f_x2_min_64f_a.h \ + volk_64u_byteswap_a.h \ + volk_64u_popcnt_a.h \ + volk_8ic_deinterleave_16i_x2_a.h \ + volk_8ic_s32f_deinterleave_32f_x2_a.h \ + volk_8ic_deinterleave_real_16i_a.h \ + volk_8ic_s32f_deinterleave_real_32f_a.h \ + volk_8ic_deinterleave_real_8i_a.h \ + volk_8ic_x2_multiply_conjugate_16ic_a.h \ + volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h \ + volk_8i_convert_16i_a.h \ volk_8i_convert_16i_u.h \ - volk_8i_s32f_convert_32f_a16.h \ + volk_8i_s32f_convert_32f_a.h \ volk_8i_s32f_convert_32f_u.h - -VOLK_MKTABLES_SOURCES = \ - $(platform_CODE) \ - $(top_srcdir)/lib/volk_rank_archs.c \ - $(top_srcdir)/lib/volk_mktables.c - - -volk_mktables$(EXEEXT): $(VOLK_MKTABLES_SOURCES) - $(CC) -o $@ $^ $(AM_CPPFLAGS) -I$(top_builddir)/include - -volk_tables.h: volk_mktables$(EXEEXT) - ./volk_mktables$(EXEEXT) - -volk_config.h: $(top_builddir)/volk_config.h - cp $^ $(top_builddir)/include/volk/$@ - -distclean-local: - rm -f volk_config_fixed.h - rm -f volk_config.h - rm -f volk_cpu.h - rm -f volk.h - rm -f volk_registry.h - rm -f volk_runtime.h - rm -f volk_typedefs.h - rm -f volk_tables.h - rm -f *.pyc - rm -f Makefile.in - rm -f volk_environment_init.h - rm -f volk_mktables - rm -f $(BUILT_SOURCES) diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml deleted file mode 100644 index a19a5add9..000000000 --- a/volk/include/volk/archs.xml +++ /dev/null @@ -1,139 +0,0 @@ -<!-- archs appear in order of significance for blind, de-facto version ordering --> -<grammar> - -<arch name="generic" type="all"> - <flag>none</flag> -</arch> - -<arch name="orc" type="all"> - <flag>lorc-0.4</flag> - <overrule>LV_HAVE_ORC</overrule> - <overrule_val>no</overrule_val> -</arch> - -<arch name="altivec" type="powerpc"> - <flag>maltivec</flag> -</arch> - -<arch name="32" type="x86" no_test="true" > - <flag>m32</flag> - <overrule>MD_SUBCPU</overrule> - <overrule_val>x86_64</overrule_val> -</arch> - -<arch name="64" type="x86"> - <op>0x80000001</op> - <reg>d</reg> - <shift>29</shift> - <flag>m64</flag> - <val>1</val> - <overrule>MD_SUBCPU</overrule> - <overrule_val>x86</overrule_val> -</arch> - -<arch name="3dnow" type="x86"> - <op>0x80000001</op> - <reg>d</reg> - <shift>31</shift> - <flag>m3dnow</flag> - <val>1</val> -</arch> - -<arch name="abm" type="x86"> - <val>1</val> - <op>0x80000001</op> - <reg>d</reg> - <shift>5</shift> - <flag>sse4.2</flag> -</arch> - -<arch name="popcount" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>23</shift> - <flag>mpopcnt</flag> -</arch> - -<arch name="mmx" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>23</shift> - <flag>mmmx</flag> -</arch> - - -<arch name="sse" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>25</shift> - <flag>msse</flag> - <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment> - <include>xmmintrin.h</include> -</arch> - - -<arch name="sse2" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>26</shift> - <flag>msse2</flag> -</arch> - -<arch name="sse3" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>0</shift> - <flag>msse3</flag> - <environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment> - <include>pmmintrin.h</include> -</arch> - -<arch name="ssse3" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>9</shift> - <flag>mssse3</flag> -</arch> - -<arch name="sse4_a" type="x86"> - <val>1</val> - <op>0x80000001</op> - <reg>c</reg> - <shift>6</shift> - <flag>msse4a</flag> -</arch> - - -<arch name="sse4_1" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>19</shift> - <flag>msse4.1</flag> -</arch> - -<arch name="sse4_2" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>20</shift> - <flag>msse4.2</flag> -</arch> - - -<arch name="avx" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>28</shift> - <flag>mavx</flag> -</arch> - - -</grammar> diff --git a/volk/include/volk/emit_omnilog.py b/volk/include/volk/emit_omnilog.py deleted file mode 100644 index 309d7e578..000000000 --- a/volk/include/volk/emit_omnilog.py +++ /dev/null @@ -1,13 +0,0 @@ -def emit_prolog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + 'extern "C" {\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; -def emit_epilog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + '}\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; - diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py deleted file mode 100644 index 6e75067d0..000000000 --- a/volk/include/volk/make_c.py +++ /dev/null @@ -1,73 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n\n#include<volk/volk.h>\n'; - tempstring = tempstring + '#include<volk/volk_tables.h>\n'; - tempstring = tempstring + '#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<string.h>\n'; - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "static inline unsigned int volk_get_index(const char** indices, const char* arch, const int* arch_defs) {\n"; - tempstring = tempstring + " int i = 1;\n" - tempstring = tempstring + " for(;i<arch_defs[0];++i){\n" - tempstring = tempstring + " if (strcmp(arch, indices[i]) == 0) {\n" - tempstring = tempstring + " return i;\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n" - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "static const char* " + funclist[i] + "_indices[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n \"" + str(taglist[i][tags_counter]) + "\",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + "_manual" + arched_arglist[i] + '\n'; - tempstring = tempstring + "return " + funclist[i] + "_archs[volk_get_index(" + funclist[i] + "_indices, arch, " + funclist[i] + "_arch_defs)](" + my_arglist[i] + ");" + "\n}\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - - tempstring = tempstring + funclist[i] + "_archs[" + funclist[i] + "_func_table](" + my_arglist[i] + ");" + '\n'; - tempstring = tempstring + "}\n\n"; - - return tempstring; diff --git a/volk/include/volk/make_config_fixed.py b/volk/include/volk/make_config_fixed.py deleted file mode 100644 index 3fd1bdf0a..000000000 --- a/volk/include/volk/make_config_fixed.py +++ /dev/null @@ -1,21 +0,0 @@ -from xml.dom import minidom - -def make_config_fixed(dom) : - tempstring = ""; - tempstring = tempstring +'/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_CONFIG_FIXED_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_CONFIG_FIXED_H'; - tempstring = tempstring + '\n\n'; - enum_counter = 0; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + '#define LV_' + arch.swapcase() + " " + str(enum_counter) + '\n'; - enum_counter = enum_counter + 1; - tempstring = tempstring + '\n\n'; - - tempstring = tempstring + "#endif /*INCLUDED_VOLK_CONFIG_FIXED*/\n" - - return tempstring; - - - diff --git a/volk/include/volk/make_config_in.py b/volk/include/volk/make_config_in.py deleted file mode 100644 index d29680af2..000000000 --- a/volk/include/volk/make_config_in.py +++ /dev/null @@ -1,13 +0,0 @@ -from xml.dom import minidom - -def make_config_in(dom) : - tempstring = ""; - tempstring = tempstring +'/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n\n'; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + '#undef LV_HAVE_' + arch.swapcase() + '\n'; - tempstring = tempstring + '\n'; - - return tempstring diff --git a/volk/include/volk/make_cpuid_generic_c.py b/volk/include/volk/make_cpuid_generic_c.py deleted file mode 100644 index c682d4138..000000000 --- a/volk/include/volk/make_cpuid_generic_c.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_generic_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; diff --git a/volk/include/volk/make_cpuid_h.py b/volk/include/volk/make_cpuid_h.py deleted file mode 100644 index cd3da2455..000000000 --- a/volk/include/volk/make_cpuid_h.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom -from emit_omnilog import * - -def make_cpuid_h(dom) : - tempstring = ""; - tempstring = tempstring +'/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_CPU_H'; - tempstring = tempstring +'\n#define INCLUDED_VOLK_CPU_H\n\n'; - tempstring = tempstring + emit_prolog(); - tempstring = tempstring + '\n' - - tempstring = tempstring + "struct VOLK_CPU {\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " int (*has_" + arch + ") ();\n"; - tempstring = tempstring + "};\n\n"; - tempstring = tempstring + "extern struct VOLK_CPU volk_cpu;\n\n"; - - tempstring = tempstring + "void volk_cpu_init ();\n" - tempstring = tempstring + "unsigned int volk_get_lvarch ();\n" - - tempstring = tempstring + "\n"; - tempstring = tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_CPU_H*/\n" - - return tempstring; diff --git a/volk/include/volk/make_cpuid_powerpc_c.py b/volk/include/volk/make_cpuid_powerpc_c.py deleted file mode 100644 index 0b0ea84e7..000000000 --- a/volk/include/volk/make_cpuid_powerpc_c.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_powerpc_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - #just assume it has them for powerpc - for domarch in dom: - if str(domarch.attributes["type"].value) == "powerpc": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - - tempstring = tempstring + "}\n\n" - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - diff --git a/volk/include/volk/make_cpuid_x86_c.py b/volk/include/volk/make_cpuid_x86_c.py deleted file mode 100644 index 2b2bd7c91..000000000 --- a/volk/include/volk/make_cpuid_x86_c.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_x86_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "#include <gcc_x86_cpuid.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - tempstring = tempstring + "#define cpuid_x86(op, r) __get_cpuid(op, r+0, r+1, r+2, r+3)\n\n" - tempstring = tempstring + "static inline unsigned int cpuid_eax(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[0];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ebx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[1];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ecx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[2];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_edx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[3];\n" - tempstring = tempstring + "}\n\n"; - - for domarch in dom: - if str(domarch.attributes["type"].value) == "x86": - if "no_test" in domarch.attributes.keys(): - no_test = str(domarch.attributes["no_test"].value); - if no_test == "true": - no_test = True; - else: - no_test = False; - else: - no_test = False; - arch = str(domarch.attributes["name"].value); - op = domarch.getElementsByTagName("op"); - if op: - op = str(op[0].firstChild.data); - reg = domarch.getElementsByTagName("reg"); - if reg: - reg = str(reg[0].firstChild.data); - shift = domarch.getElementsByTagName("shift"); - if shift: - shift = str(shift[0].firstChild.data); - val = domarch.getElementsByTagName("val"); - if val: - val = str(val[0].firstChild.data); - - if no_test: - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif op == "1": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int e" + reg + "x = cpuid_e" + reg + "x (" + op + ");\n" - tempstring = tempstring + " return ((e" + reg + "x >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - - elif op == "0x80000001": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int extended_fct_count = cpuid_eax(0x80000000);\n"; - tempstring = tempstring + " if (extended_fct_count < 0x80000001)\n"; - tempstring = tempstring + " return "+ val + "^1;\n\n" - tempstring = tempstring + " unsigned int extended_features = cpuid_e" + reg + "x (" + op + ");\n"; - tempstring = tempstring + " return ((extended_features >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - - - - - - - diff --git a/volk/include/volk/make_environment_init_c.py b/volk/include/volk/make_environment_init_c.py deleted file mode 100644 index e06c7f246..000000000 --- a/volk/include/volk/make_environment_init_c.py +++ /dev/null @@ -1,33 +0,0 @@ -from xml.dom import minidom - -def make_environment_init_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include<volk/volk_environment_init.h>\n" - tempstring = tempstring + "#include<volk/volk_config.h>\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - incs = domarch.getElementsByTagName("include"); - for inc in incs: - my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + "#include<" + my_inc + ">\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + '\n\n'; - tempstring = tempstring + "void volk_environment_init(){\n" - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - envs = domarch.getElementsByTagName("environment"); - for env in envs: - cmd = str(env.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + " " + cmd + "\n"; - tempstring = tempstring + "#endif\n" - - tempstring = tempstring + "}\n"; - return tempstring; - - - - diff --git a/volk/include/volk/make_environment_init_h.py b/volk/include/volk/make_environment_init_h.py deleted file mode 100644 index 77a841a24..000000000 --- a/volk/include/volk/make_environment_init_h.py +++ /dev/null @@ -1,18 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * - -def make_environment_init_h() : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#ifndef INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; - tempstring = tempstring + "#define INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; - tempstring = tempstring + "\n"; - tempstring = tempstring + emit_prolog(); - tempstring = tempstring + "void volk_environment_init();\n"; - tempstring = tempstring + emit_epilog(); - tempstring = tempstring + "#endif\n" - return tempstring; - - - - diff --git a/volk/include/volk/make_h.py b/volk/include/volk/make_h.py deleted file mode 100644 index 81d9ad401..000000000 --- a/volk/include/volk/make_h.py +++ /dev/null @@ -1,28 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_environment_init.h>\n' - tempstring = tempstring + emit_prolog() - tempstring = tempstring + '\n'; - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - tempstring = tempstring + retlist[i] + funclist[i] + "_manual" + replace_bracket.sub(";", arched_arglist[i]) + '\n'; - - tempstring = tempstring + emit_epilog(); - - tempstring = tempstring + "#endif /*INCLUDED_VOLK_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_init_c.py b/volk/include/volk/make_init_c.py deleted file mode 100644 index 330e19592..000000000 --- a/volk/include/volk/make_init_c.py +++ /dev/null @@ -1,42 +0,0 @@ -from xml.dom import minidom - -def make_init_c(funclist, dom) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - incs = domarch.getElementsByTagName("include"); - for inc in incs: - my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + "#include<" + my_inc + ">\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + '\n\n'; - - tempstring = tempstring + "extern struct VOLK_RUNTIME volk_runtime;\n\n"; - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime(){\n"; - tempstring = tempstring + " return &volk_runtime;\n"; - tempstring = tempstring + "}\n\n" - tempstring = tempstring + " void volk_runtime_init() {\nvolk_cpu_init();\n"; - - for func in funclist: - tempstring = tempstring + " volk_runtime." + func + " = default_acquire_" + func + ";\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - envs = domarch.getElementsByTagName("environment"); - for env in envs: - cmd = str(env.firstChild.data); - tempstring = tempstring + " if(volk_cpu.has_" + arch + "()){\n"; - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + " " + cmd + "\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + " }\n"; - - tempstring = tempstring + "}\n"; - - return tempstring diff --git a/volk/include/volk/make_init_h.py b/volk/include/volk/make_init_h.py deleted file mode 100644 index 6dbe1c585..000000000 --- a/volk/include/volk/make_init_h.py +++ /dev/null @@ -1,26 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_init_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - - tempstring = tempstring + '\n'; - - tempstring = tempstring + emit_prolog(); - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + " default_acquire_" + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - - tempstring= tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_INIT_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_mktables.py b/volk/include/volk/make_mktables.py deleted file mode 100644 index 051ac268d..000000000 --- a/volk/include/volk/make_mktables.py +++ /dev/null @@ -1,33 +0,0 @@ - - -def make_mktables(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '#include<stdio.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<volk_rank_archs.h>\n'; - tempstrgin = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + "\n\n"; - - tempstring = tempstring + 'int main() {\n'; - tempstring = tempstring + ' int i = 0;\n'; - tempstring = tempstring + ' FILE* output;\n'; - tempstring = tempstring + ' output = fopen("volk_tables.h", "w");\n'; - tempstring = tempstring + ' fprintf(output, "#ifndef INCLUDED_VOLK_TABLES_H\\n");\n'; - tempstring = tempstring + ' fprintf(output, "#define INCLUDED_VOLK_TABLES_H\\n\\n");\n'; - - for func in funclist: - tempstring = tempstring + ' fprintf(output, "static const ' + func + '_func_table = %u;\\n", volk_rank_archs(' + func + '_arch_defs, volk_get_lvarch()));\n'; - tempstring = tempstring + ' fprintf(output, "#endif /*INCLUDED_VOLK_TABLES_H*/\\n");\n'; - tempstring = tempstring + ' fclose(output);\n' - tempstring = tempstring + '}\n'; - return tempstring; - - - - - - - - diff --git a/volk/include/volk/make_proccpu_sim.py b/volk/include/volk/make_proccpu_sim.py deleted file mode 100644 index 029dacfcc..000000000 --- a/volk/include/volk/make_proccpu_sim.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_proccpu_sim(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <stdio.h>\n" - tempstring = tempstring + "\n\n" - - tempstring = tempstring + "void test_append(char* buf, int val, char* newkey){\n"; - tempstring = tempstring + " if(val==1){\n"; - tempstring = tempstring + " sprintf(buf, \"%s %s\", buf, newkey);\n"; - tempstring = tempstring + " }\n"; - tempstring = tempstring + "}\n"; - tempstring = tempstring + "\n\n"; - - tempstring = tempstring + "int main() {\n"; - tempstring = tempstring + " volk_cpu_init();\n"; - tempstring = tempstring + " char buf[2048];\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " test_append(buf, volk_cpu.has_" + arch + "(), \"" + arch + "\");\n" - tempstring = tempstring + " printf(\"%s\\n\", buf);\n" - tempstring = tempstring + "}\n" - return tempstring; diff --git a/volk/include/volk/make_registry.py b/volk/include/volk/make_registry.py deleted file mode 100644 index 8457d61f3..000000000 --- a/volk/include/volk/make_registry.py +++ /dev/null @@ -1,62 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -import string - -def make_registry(dom, funclist, fcountlist) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_REGISTRY_H'; - tempstring = tempstring +'\n#define INCLUDED_VOLK_REGISTRY_H\n\n'; - tempstring = tempstring +'#include<volk/volk_config.h>\n'; - tempstring = tempstring +'#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + emit_prolog(); - tempstring = tempstring + '\n' - - - - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring +"#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 1\n"; - tempstring = tempstring +"#else\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 0\n"; - tempstring = tempstring +"#endif /*LV_HAVE_" + arch.swapcase() + "*/\n\n"; - - counter = 0; - for fcount in fcountlist: - tempstring = tempstring + "static const int " + funclist[counter] + "_arch_defs[] = {\n"; - counter = counter + 1; - for arch_list in fcount: - tempstring = tempstring + " (LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind] + "_CNT"; - if ind < len(arch_list) - 1: - tempstring = tempstring + " * LV_"; - tempstring = tempstring + ") + "; - lindex = tempstring.rfind(" + "); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], " + ", ""); - tempstring = tempstring + ",\n" - for arch_list in fcount: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - tempstring = tempstring + "\n" - tempstring = tempstring + " (1 << LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + ") + (1 << LV_" - tempstring = tempstring + "),\n#endif\n" - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n" - - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring +"#endif /*INCLUDED_VOLK_REGISTRY_H*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime.py b/volk/include/volk/make_runtime.py deleted file mode 100644 index 645b3aaee..000000000 --- a/volk/include/volk/make_runtime.py +++ /dev/null @@ -1,34 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_runtime(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n\n#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + emit_prolog(); - - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME {\n"; - - for i in range(len(funclist)): - tempstring = tempstring + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n"; - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime();\n\n" - tempstring = tempstring + "\nvoid volk_runtime_init();\n"; - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime_c.py b/volk/include/volk/make_runtime_c.py deleted file mode 100644 index 070df9ba7..000000000 --- a/volk/include/volk/make_runtime_c.py +++ /dev/null @@ -1,47 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_runtime_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + "#include<volk/volk_config_fixed.h>\n"; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME volk_runtime;\n"; - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - - tempstring = tempstring + retlist[i] + "default_acquire_" + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - tempstring = tempstring + "volk_runtime." + funclist[i] + " = " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())];\n" + "return " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())](" + my_arglist[i] + ");" + '\n}\n'; - - return tempstring; diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py deleted file mode 100644 index c74b0464d..000000000 --- a/volk/include/volk/make_set_simd.py +++ /dev/null @@ -1,272 +0,0 @@ -# -# Copyright 2010 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# - -from xml.dom import minidom - -def make_set_simd(dom) : - tempstring = ""; - tempstring = tempstring +'dnl this file is auto generated by volk_register.py\n\n'; - - tempstring = tempstring + "AC_DEFUN([_MAKE_FAKE_PROCCPU],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_MSG_CHECKING([proccpu])\n"; - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ -I$srcdir/lib $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_x86.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (powerpc)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_powerpc.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (*)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_generic.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " esac\n"; - tempstring = tempstring + "])\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - tempstring = tempstring + "AC_DEFUN([_TRY_ADD_" + arch.swapcase() + "],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " LF_CHECK_CC_FLAG([-" + flag + "])\n"; - tempstring = tempstring + " LF_CHECK_CXX_FLAG([-" + flag + "])\n"; - tempstring = tempstring + "])\n"; - - tempstring = tempstring + "AC_DEFUN([LV_SET_SIMD_FLAGS],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_SUBST(LV_CXXFLAGS)\n"; - tempstring = tempstring + " indCC=no\n"; - tempstring = tempstring + " indCXX=no\n"; - tempstring = tempstring + " indLV_ARCH=no\n"; - tempstring = tempstring + " AC_ARG_WITH(lv_arch,\n"; - tempstring = tempstring + " AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list("; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + arch + ", " - tempstring = tempstring[0:len(tempstring) - 2]; - - tempstring = tempstring + ")]),\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"$withval\"],\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"\"])\n"; - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; - tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; - tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; - tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; - tempstring = tempstring + " cf_with_lv_arch=$lv_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"yes\"\n"; - - tempstring = tempstring + " fi\n"; - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=no\n"; - - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "x86": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - overrule = domarch.getElementsByTagName("overrule"); - if overrule: - overrule = str(overrule[0].firstChild.data); - else: - overrule = ""; - overrule_val = domarch.getElementsByTagName("overrule_val"); - if overrule_val: - overrule_val = str(overrule_val[0].firstChild.data); - else: - overrule_val = ""; - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "x86": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - - tempstring = tempstring + " if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - - tempstring = tempstring + " ;;\n" - - tempstring = tempstring + " (powerpc)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "powerpc": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "powerpc": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test \"$indCC\" = yes && test \"indCXX\" = yes && \"indLV_ARCH\" = yes; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " (*)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " esac\n" - tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" - tempstring = tempstring + "])\n" - - return tempstring; - - diff --git a/volk/include/volk/make_typedefs.py b/volk/include/volk/make_typedefs.py deleted file mode 100644 index fe81cb2b0..000000000 --- a/volk/include/volk/make_typedefs.py +++ /dev/null @@ -1,23 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - - -def make_typedefs(funclist, retlist, my_argtypelist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_TYPEDEFS'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_TYPEDEFS\n'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - - tempstring = tempstring + '\n'; - - for i in range(len(funclist)): - tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n\n"; - - tempstring = tempstring + "#endif /*INCLUDED_VOLK_TYPEDEFS*/\n"; - - return tempstring; diff --git a/volk/include/volk/volk_16i_branch_4_state_8_a16.h b/volk/include/volk/volk_16i_branch_4_state_8_a.h index 3437c1a6b..0424e66e9 100644 --- a/volk/include/volk/volk_16i_branch_4_state_8_a16.h +++ b/volk/include/volk/volk_16i_branch_4_state_8_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H -#define INCLUDED_volk_16i_branch_4_state_8_a16_H +#ifndef INCLUDED_volk_16i_branch_4_state_8_a_H +#define INCLUDED_volk_16i_branch_4_state_8_a_H #include<inttypes.h> @@ -8,13 +8,13 @@ -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11; @@ -137,8 +137,8 @@ static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* s #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_branch_4_state_8_a_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { int i = 0; int bound = 4; @@ -191,4 +191,4 @@ static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/ +#endif /*INCLUDED_volk_16i_branch_4_state_8_a_H*/ diff --git a/volk/include/volk/volk_16i_convert_8i_a16.h b/volk/include/volk/volk_16i_convert_8i_a.h index 73e45ad63..8046035c7 100644 --- a/volk/include/volk/volk_16i_convert_8i_a16.h +++ b/volk/include/volk/volk_16i_convert_8i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16i_convert_8i_a16_H -#define INCLUDED_volk_16i_convert_8i_a16_H +#ifndef INCLUDED_volk_16i_convert_8i_a_H +#define INCLUDED_volk_16i_convert_8i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data @@ -12,7 +12,7 @@ \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -52,7 +52,7 @@ static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int1 \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -66,4 +66,4 @@ static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const i -#endif /* INCLUDED_volk_16i_convert_8i_a16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_a_H */ diff --git a/volk/include/volk/volk_16i_convert_8i_u.h b/volk/include/volk/volk_16i_convert_8i_u.h index 5fc792b56..df1084fe0 100644 --- a/volk/include/volk/volk_16i_convert_8i_u.h +++ b/volk/include/volk/volk_16i_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data diff --git a/volk/include/volk/volk_16i_max_star_16i_a16.h b/volk/include/volk/volk_16i_max_star_16i_a.h index ff57bd2a1..6a4f63708 100644 --- a/volk/include/volk/volk_16i_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_16i_a.h @@ -1,18 +1,18 @@ -#ifndef INCLUDED_volk_16i_max_star_16i_a16_H -#define INCLUDED_volk_16i_max_star_16i_a16_H +#ifndef INCLUDED_volk_16i_max_star_16i_a_H +#define INCLUDED_volk_16i_max_star_16i_a_H #include<inttypes.h> #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) { @@ -85,9 +85,9 @@ static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) { int i = 0; @@ -105,4 +105,4 @@ static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_max_star_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h index 695e08dbf..f60b33a41 100644 --- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a.h @@ -1,18 +1,18 @@ -#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H -#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H +#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H +#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H #include<inttypes.h> #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d}; @@ -109,8 +109,8 @@ static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_max_star_horizontal_16i_a_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { int i = 0; @@ -127,4 +127,4 @@ static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h index e52a949fb..de36cee80 100644 --- a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h +++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H -#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H +#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a_H +#define INCLUDED_volk_16i_permute_and_scalar_add_a_H #include<inttypes.h> @@ -8,12 +8,12 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -116,8 +116,8 @@ static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, sho #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_permute_and_scalar_add_a_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { int i = 0; @@ -136,4 +136,4 @@ static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, sh #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/ +#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a_H*/ diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a.h index 83fd26ff9..0555fdf00 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H -#define INCLUDED_volk_16i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H +#define INCLUDED_volk_16i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -58,7 +58,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -68,7 +68,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -94,7 +94,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer @@ -102,7 +102,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -116,4 +116,4 @@ static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, co -#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_u.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h index 8f0dd0083..d34acc091 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -59,7 +59,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -96,7 +96,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const in } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h index e4ec5ab4e..2688aff04 100644 --- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H -#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H +#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a_H +#define INCLUDED_volk_16i_x4_quad_max_star_16i_a_H #include<inttypes.h> @@ -9,11 +9,11 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<emmintrin.h> -static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { @@ -96,9 +96,9 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* /*asm volatile ( - "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t" + "volk_16i_x4_quad_max_star_16i_a_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t" + "je volk_16i_x4_quad_max_star_16i_a_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" @@ -143,9 +143,9 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* "movaps %%xmm1, (%[target])\n\t" "addw $16, %[target]\n\t" - "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t" + "jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t" - "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t" + "volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target) : @@ -167,8 +167,8 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC -static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_16i_x4_quad_max_star_16i_a_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { int i = 0; @@ -188,4 +188,4 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, shor #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/ +#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a_H*/ diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h index 5744ca3a6..e4c9f17ed 100644 --- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h +++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H -#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H +#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H +#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H #include<inttypes.h> @@ -9,11 +9,11 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4; __m128i *p_target0, *p_target1, *p_target2, *p_target3, *p_src0, *p_src1, *p_src2, *p_src3, *p_src4; @@ -65,9 +65,9 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* } /*asm volatile ( - ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t" + "je .%=volk_16i_x5_add_quad_16i_x4_a_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" "movaps (%[src2]), %%xmm3\n\t" @@ -91,8 +91,8 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* "add $16, %[target1]\n\t" "add $16, %[target2]\n\t" "add $16, %[target3]\n\t" - "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t" - ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t" + "jmp .%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3) :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -111,9 +111,9 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { int i = 0; @@ -133,4 +133,4 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short -#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/ +#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H*/ diff --git a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h index 7e08bf182..cdd60235e 100644 --- a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H -#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H +#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -52,7 +52,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -61,7 +61,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -120,7 +120,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1 } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector @@ -128,7 +128,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1 \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -140,7 +140,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector @@ -148,11 +148,11 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); -static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points); +extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_16i_x2_a_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a_H */ diff --git a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h index 388c00592..2b99e068e 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H -#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H +#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -47,7 +47,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -55,7 +55,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -96,14 +96,14 @@ static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, co } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -117,4 +117,4 @@ static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, -#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h index 55a25702e..cd2fabb52 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H -#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H +#define INCLUDED_volk_16ic_deinterleave_real_8i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -59,14 +59,14 @@ static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, con } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; int16_t* complexVectorPtr = (int16_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -77,18 +77,18 @@ static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); -static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points); +extern void volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_real_8i_a_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a_H */ diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a.h index bdcace750..a6951e967 100644 --- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H -#define INCLUDED_volk_16ic_magnitude_16i_a16_H +#ifndef INCLUDED_volk_16ic_magnitude_16i_a_H +#define INCLUDED_volk_16ic_magnitude_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -13,7 +14,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,8 +26,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -76,7 +77,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -84,7 +85,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,8 +97,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float inputFloatBuffer[4] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -153,14 +154,14 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -173,18 +174,18 @@ static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); -static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ - volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); +extern void volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); +static inline void volk_16ic_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */ +#endif /* INCLUDED_volk_16ic_magnitude_16i_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h index 606de2fc5..e73d405e0 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H +#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data @@ -14,7 +15,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl __m128 invScalar = _mm_set_ps1(1.0/scalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ @@ -68,7 +69,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector @@ -77,7 +78,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -89,7 +90,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector @@ -98,11 +99,11 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ - volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); +extern void volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_deinterleave_32f_x2_a_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h index 62331e496..1630cb0ed 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H -#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H +#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -13,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -52,7 +53,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -61,7 +62,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -72,7 +73,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, __m128 invScalar = _mm_set_ps1(iScalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -99,7 +100,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I float vector data \param complexVector The complex input vector @@ -107,7 +108,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; @@ -122,4 +123,4 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuff -#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h index ae64efbeb..35406e2cb 100644 --- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H -#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H +#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -14,7 +15,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ @@ -70,7 +71,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -79,7 +80,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -91,7 +92,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result, re, im; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ inputFloatBuffer[0] = (float)(complexVectorPtr[0]); @@ -140,7 +141,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -148,7 +149,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -161,7 +162,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -169,11 +170,11 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ - volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_magnitude_32f_a_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */ +#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */ diff --git a/volk/include/volk/volk_16u_byteswap_a16.h b/volk/include/volk/volk_16u_byteswap_a.h index c8128dbab..75c7ef0f3 100644 --- a/volk/include/volk/volk_16u_byteswap_a16.h +++ b/volk/include/volk/volk_16u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_16u_byteswap_a16_H -#define INCLUDED_volk_16u_byteswap_a16_H +#ifndef INCLUDED_volk_16u_byteswap_a_H +#define INCLUDED_volk_16u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint16_t* inputPtr = intsToSwap; __m128i input, left, right, output; @@ -43,13 +43,13 @@ static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned int num_points){ unsigned int point; uint16_t* inputPtr = intsToSwap; for(point = 0; point < num_points; point++){ @@ -61,17 +61,17 @@ static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points); -static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){ - volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points); +extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points); +static inline void volk_16u_byteswap_a_orc(uint16_t* intsToSwap, unsigned int num_points){ + volk_16u_byteswap_a_orc_impl(intsToSwap, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_16u_byteswap_a16_H */ +#endif /* INCLUDED_volk_16u_byteswap_a_H */ diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a.h index 4a3588e6d..7ce0d1c80 100644 --- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H -#define INCLUDED_volk_32f_accumulator_s32f_a16_H +#ifndef INCLUDED_volk_32f_accumulator_s32f_a_H +#define INCLUDED_volk_32f_accumulator_s32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Accumulates the values in the input buffer @@ -12,13 +13,13 @@ \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float tempBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float tempBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -42,14 +43,14 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Accumulates the values in the input buffer \param result The accumulated result \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a_generic(float* result, const float* inputBuffer, unsigned int num_points){ const float* aPtr = inputBuffer; unsigned int number = 0; float returnValue = 0; @@ -64,4 +65,4 @@ static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const fl -#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */ +#endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_a16.h b/volk/include/volk/volk_32f_convert_64f_a.h index c303dc118..dda646409 100644 --- a/volk/include/volk/volk_32f_convert_64f_a16.h +++ b/volk/include/volk/volk_32f_convert_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_convert_64f_a16_H -#define INCLUDED_volk_32f_convert_64f_a16_H +#ifndef INCLUDED_volk_32f_convert_64f_a_H +#define INCLUDED_volk_32f_convert_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const flo \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const -#endif /* INCLUDED_volk_32f_convert_64f_a16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_a_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_u.h b/volk/include/volk/volk_32f_convert_64f_u.h index a825767de..387baa3b9 100644 --- a/volk/include/volk/volk_32f_convert_64f_u.h +++ b/volk/include/volk/volk_32f_convert_64f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a.h index d070e17d5..3e0cf1d65 100644 --- a/volk/include/volk/volk_32f_index_max_16u_a16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a.h @@ -1,14 +1,15 @@ -#ifndef INCLUDED_volk_32f_index_max_16u_a16_H -#define INCLUDED_volk_32f_index_max_16u_a16_H +#ifndef INCLUDED_volk_32f_index_max_16u_a_H +#define INCLUDED_volk_32f_index_max_16u_a_H #include <volk/volk_common.h> +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include<smmintrin.h> -static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,8 +26,8 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -63,10 +64,10 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const #endif /*LV_HAVE_SSE4_1*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include<xmmintrin.h> -static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -83,8 +84,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -122,8 +123,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_GENERIC -static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ float max = src0[0]; unsigned int index = 0; @@ -145,4 +146,4 @@ static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, cons #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/ +#endif /*INCLUDED_volk_32f_index_max_16u_a_H*/ diff --git a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h index ff4d5b19c..b25df75a1 100644 --- a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H -#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H +#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. @@ -14,7 +14,7 @@ \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_noints The number of real values in the input vector. */ -static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -78,7 +78,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. \param outputVector The byte-aligned vector where the results will be stored. @@ -87,7 +87,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_points The number of real values in the input vector. */ -static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -117,4 +117,4 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVect -#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h index 168245d65..b1902a8c0 100644 --- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H -#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H +#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the spectral noise floor of an input power spectrum @@ -16,12 +17,12 @@ \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; const float* dataPointsPtr = realDataPoints; - float avgPointsVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4]; __m128 dataPointsVal; __m128 avgPointsVal = _mm_setzero_ps(); @@ -87,7 +88,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no sumMean += avgPointsVector[3]; // Calculate the number of valid bins from the remaning count - float validBinCountVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4]; _mm_store_ps(validBinCountVector, vValidBinCount); float validBinCount = 0; @@ -116,7 +117,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the spectral noise floor of an input power spectrum @@ -127,7 +128,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ float sumMean = 0.0; unsigned int number; // find the sum (for mean), etc @@ -164,4 +165,4 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float -#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h index d6b16e336..0a2b4f0f2 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H -#define INCLUDED_volk_32f_s32f_convert_16i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H +#define INCLUDED_volk_32f_s32f_convert_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -13,7 +14,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -44,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -53,7 +54,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -63,7 +64,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -93,7 +94,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -107,4 +108,4 @@ static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, -#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index 4d306e53c..dec3f1611 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -45,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -65,7 +65,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h index ae874fd7b..aa370e614 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h @@ -1,10 +1,47 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H -#define INCLUDED_volk_32f_s32f_convert_32i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H +#define INCLUDED_volk_32f_s32f_convert_32i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_AVX +#include <immintrin.h> + /*! + \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value + \param inputVector The floating point input data buffer + \param outputVector The 32 bit output data buffer + \param scalar The value multiplied against each point in the input buffer + \param num_points The number of data values to be converted + */ +static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + + const unsigned int eighthPoints = num_points / 8; + + const float* inputVectorPtr = (const float*)inputVector; + int32_t* outputVectorPtr = outputVector; + __m256 vScalar = _mm256_set1_ps(scalar); + __m256 inputVal1; + __m256i intInputVal1; + + for(;number < eighthPoints; number++){ + inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8; + + intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar)); + + _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1); + outputVectorPtr += 8; + } + + number = eighthPoints * 8; + for(; number < num_points; number++){ + outputVector[number] = (int32_t)(inputVector[number] * scalar); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -13,7 +50,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -40,7 +77,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -49,7 +86,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -59,7 +96,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -89,7 +126,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -103,4 +140,4 @@ static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, -#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index 561fcd800..b4e954dc4 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h index f64f2a213..8d87a07d7 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H -#define INCLUDED_volk_32f_s32f_convert_8i_a16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H +#define INCLUDED_volk_32f_s32f_convert_8i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -13,7 +14,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -51,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -60,7 +61,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -70,7 +71,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); @@ -100,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -114,4 +115,4 @@ static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, co -#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index 420693571..1c6bf87c9 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -52,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -72,7 +72,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_normalize_a16.h b/volk/include/volk/volk_32f_s32f_normalize_a.h index 0850cddf7..f5fd0d1db 100644 --- a/volk/include/volk/volk_32f_s32f_normalize_a16.h +++ b/volk/include/volk/volk_32f_s32f_normalize_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H -#define INCLUDED_volk_32f_s32f_normalize_a16_H +#ifndef INCLUDED_volk_32f_s32f_normalize_a_H +#define INCLUDED_volk_32f_s32f_normalize_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Normalizes all points in the buffer by the scalar value ( divides each data point by the scalar value ) @@ -12,7 +12,7 @@ \param num_points The number of values in vecBuffer \param scalar The scale value to be applied to each buffer value */ -static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -49,7 +49,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a_generic(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; const float invScalar = 1.0 / scalar; @@ -60,7 +60,7 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -68,14 +68,14 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); -static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){ +extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); +static inline void volk_32f_s32f_normalize_a_orc(float* vecBuffer, const float scalar, unsigned int num_points){ float invscalar = 1.0 / scalar; - volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); + volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_normalize_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a.h index 3ed594d9a..c4fa31bd1 100644 --- a/volk/include/volk/volk_32f_s32f_power_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_power_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H -#define INCLUDED_volk_32f_s32f_power_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H +#define INCLUDED_volk_32f_s32f_power_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <tmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,14 +19,14 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -62,10 +62,10 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -76,14 +76,14 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -119,7 +119,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored @@ -127,7 +127,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -141,4 +141,4 @@ static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const flo -#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */ diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h index 32f4fa067..881067bdc 100644 --- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H -#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H +#define INCLUDED_volk_32f_s32f_stddev_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -14,7 +15,7 @@ \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -22,7 +23,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal1, aVal2, aVal3, aVal4; @@ -65,7 +66,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -74,7 +75,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -82,7 +83,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -111,7 +112,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation of the input buffer using the supplied mean \param stddev The calculated standard deviation @@ -119,7 +120,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ const float* aPtr = inputBuffer; @@ -141,4 +142,4 @@ static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const flo -#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */ +#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */ diff --git a/volk/include/volk/volk_32f_sqrt_32f_a16.h b/volk/include/volk/volk_32f_sqrt_32f_a.h index 513c2cffe..e44c73cfd 100644 --- a/volk/include/volk/volk_32f_sqrt_32f_a16.h +++ b/volk/include/volk/volk_32f_sqrt_32f_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H -#define INCLUDED_volk_32f_sqrt_32f_a16_H +#ifndef INCLUDED_volk_32f_sqrt_32f_a_H +#define INCLUDED_volk_32f_sqrt_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Sqrts the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -40,14 +40,14 @@ static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVecto } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Sqrts the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a_generic(float* cVector, const float* aVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -58,20 +58,20 @@ static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aV } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC -extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int); +#ifdef LV_HAVE_ORC +extern void volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int); /*! \brief Sqrts the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){ - volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points); +static inline void volk_32f_sqrt_32f_a_orc(float* cVector, const float* aVector, unsigned int num_points){ + volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */ +#endif /* INCLUDED_volk_32f_sqrt_32f_a_H */ diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h index 278089841..3a82e3d2f 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H -#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H +#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -14,7 +15,7 @@ \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -22,8 +23,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo const unsigned int sixteenthPoints = num_points / 16; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -78,7 +79,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -87,7 +88,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -95,8 +96,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -134,7 +135,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation and mean of the input buffer \param stddev The calculated standard deviation @@ -142,7 +143,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -166,4 +167,4 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, fl -#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */ +#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */ diff --git a/volk/include/volk/volk_32f_x2_add_32f_a16.h b/volk/include/volk/volk_32f_x2_add_32f_a.h index d0d0e0a0e..3bc83653b 100644 --- a/volk/include/volk/volk_32f_x2_add_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_add_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H -#define INCLUDED_volk_32f_x2_add_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_add_32f_a_H +#define INCLUDED_volk_32f_x2_add_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Adds the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_add_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_add_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_divide_32f_a16.h b/volk/include/volk/volk_32f_x2_divide_32f_a.h index d844e25b0..52ddfae87 100644 --- a/volk/include/volk/volk_32f_x2_divide_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_divide_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H -#define INCLUDED_volk_32f_x2_divide_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_divide_32f_a_H +#define INCLUDED_volk_32f_x2_divide_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Divides the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,12 +71,12 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_divide_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_divide_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_divide_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_divide_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h index 61aa56815..0c58f2ecf 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a.h @@ -1,13 +1,14 @@ -#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H -#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_a_H +#include <volk/volk_common.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -24,10 +25,10 @@ static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const fl #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE -static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +54,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,11 +74,11 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -102,7 +103,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,11 +121,11 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -163,7 +164,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; @@ -181,4 +182,4 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a_H*/ diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index 8469a3cea..7f47122ff 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -4,7 +4,7 @@ #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) { @@ -24,7 +24,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const floa #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* input, const float* taps, unsigned int num_points) { @@ -53,7 +53,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,7 +73,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> @@ -102,7 +102,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,7 +120,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> @@ -163,7 +163,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h index 29c9392df..1d4d2dbbd 100644 --- a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h +++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H -#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H +#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H +#define INCLUDED_volk_32f_x2_interleave_32fc_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector @@ -13,7 +13,7 @@ \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ unsigned int number = 0; float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; @@ -48,7 +48,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector. \param iBuffer The I buffer data to be interleaved @@ -56,7 +56,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVec -#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */ +#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */ diff --git a/volk/include/volk/volk_32f_x2_max_32f_a16.h b/volk/include/volk/volk_32f_x2_max_32f_a.h index 26e7f1246..7948c458d 100644 --- a/volk/include/volk/volk_32f_x2_max_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_max_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H -#define INCLUDED_volk_32f_x2_max_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_max_32f_a_H +#define INCLUDED_volk_32f_x2_max_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -45,7 +45,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,7 +67,7 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -75,11 +75,11 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_max_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_max_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_max_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_min_32f_a16.h b/volk/include/volk/volk_32f_x2_min_32f_a.h index 23bae044c..d77134868 100644 --- a/volk/include/volk/volk_32f_x2_min_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_min_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H -#define INCLUDED_volk_32f_x2_min_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_min_32f_a_H +#define INCLUDED_volk_32f_x2_min_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -45,7 +45,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,7 +67,7 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -75,11 +75,11 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_min_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a.h index a0dcfa86e..fae9a652f 100644 --- a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_multiply_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H -#define INCLUDED_volk_32f_x2_multiply_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H +#define INCLUDED_volk_32f_x2_multiply_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplys the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,46 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_AVX +#include <immintrin.h> +/*! + \brief Multiplies the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m256 aVal, bVal, cVal; + for(;number < eighthPoints; number++){ + + aVal = _mm256_load_ps(aPtr); + bVal = _mm256_load_ps(bPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + bPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +90,7 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +102,7 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +110,11 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_multiply_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h index 30306774d..cc02c3678 100644 --- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H -#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H +#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -14,7 +15,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -62,7 +63,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -72,7 +73,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -85,7 +86,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ iValue = _mm_load_ps(iBufferPtr); @@ -127,7 +128,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. \param iBuffer The I buffer data to be interleaved @@ -136,7 +137,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ int16_t* complexVectorPtr = (int16_t*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -152,4 +153,4 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* compl -#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */ +#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */ diff --git a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a.h index 7404bfe79..16cad008a 100644 --- a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_subtract_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H -#define INCLUDED_volk_32f_x2_subtract_32f_a16_H +#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H +#define INCLUDED_volk_32f_x2_subtract_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Subtracts bVector form aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ - volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_subtract_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */ +#endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */ diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h index af9e39537..2ea8fa96d 100644 --- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h +++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H -#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H +#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H +#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H #include<inttypes.h> #include<stdio.h> @@ -9,11 +9,11 @@ #define MAX(X,Y) ((X) > (Y)?(X):(Y)) #endif -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { float result = 0.0; @@ -98,9 +98,9 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* sr #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { @@ -148,4 +148,4 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/ +#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H*/ diff --git a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h index 514998800..b7350b9fa 100644 --- a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H -#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H +#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H +#define INCLUDED_volk_32fc_32f_multiply_32fc_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies the input complex vector with the input float vector and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector The vectors containing the float values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -56,7 +56,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored @@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const float* bPtr= bVector; @@ -76,7 +76,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored @@ -84,12 +84,12 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); -static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ - volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32fc_32f_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ + volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h index 84d2576ed..9de036ef4 100644 --- a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H +#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I & Q vector data \param complexVector The complex input vector @@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, flo -#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h index 34262a7af..29c369d9a 100644 --- a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H -#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data \param complexVector The complex input vector @@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, do -#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h index 9838ec88b..a1d0fd5d1 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H -#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H +#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -44,14 +44,14 @@ static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; @@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, c -#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h index af392d074..70a3b1971 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H -#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H +#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -42,14 +42,14 @@ static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, -#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */ diff --git a/volk/include/volk/volk_32fc_index_max_16u_a16.h b/volk/include/volk/volk_32fc_index_max_16u_a.h index 532ae4e7c..312e034e2 100644 --- a/volk/include/volk/volk_32fc_index_max_16u_a16.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a.h @@ -1,17 +1,17 @@ -#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H -#define INCLUDED_volk_32fc_index_max_16u_a16_H +#ifndef INCLUDED_volk_32fc_index_max_16u_a_H +#define INCLUDED_volk_32fc_index_max_16u_a_H #include <volk/volk_common.h> #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { @@ -188,8 +188,8 @@ static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32f #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_index_max_16u_a_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { float sq_dist = 0.0; float max = 0.0; unsigned int index = 0; @@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_ #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/ +#endif /*INCLUDED_volk_32fc_index_max_16u_a_H*/ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_a16.h b/volk/include/volk/volk_32fc_magnitude_32f_a.h index be7216dce..f18e9bc0b 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_a16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H -#define INCLUDED_volk_32fc_magnitude_32f_a16_H +#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H +#define INCLUDED_volk_32fc_magnitude_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -48,7 +48,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,14 +96,14 @@ static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -115,18 +115,18 @@ static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); -static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ - volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points); +extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_a_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h index e9f74438d..9304b0c28 100644 --- a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H -#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H +#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,7 +19,7 @@ \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -27,7 +27,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -67,10 +67,10 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -81,7 +81,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -89,7 +89,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -131,7 +131,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the atan2 on the input vector and stores the results in the output vector. \param outputVector The vector where the results will be stored. @@ -139,7 +139,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ float* outPtr = outputVector; const float* inPtr = (float*)inputVector; const float invNormalizeFactor = 1.0 / normalizeFactor; @@ -155,4 +155,4 @@ static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, con -#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h index 31465bff9..1c17fb70c 100644 --- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H -#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H +#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data @@ -13,7 +14,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -24,7 +25,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer __m128 cplxValue1, cplxValue2, iValue; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -54,7 +55,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data \param complexVector The complex input vector @@ -62,7 +63,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* iBufferPtr = iBuffer; unsigned int number = 0; @@ -77,4 +78,4 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBu -#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h index 530359600..38fd609d3 100644 --- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a.h @@ -1,11 +1,12 @@ -#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H -#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H +#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -14,7 +15,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -25,7 +26,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto __m128 cplxValue1, cplxValue2, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -60,7 +61,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -69,7 +70,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -80,7 +81,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -120,7 +121,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -128,7 +129,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -140,7 +141,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -148,11 +149,11 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); -static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ - volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_s32f_magnitude_16i_a_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h index 3507fdb3c..ec1d7167f 100644 --- a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a.h @@ -1,13 +1,21 @@ -#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H -#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H +#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H +#define INCLUDED_volk_32fc_s32f_power_32fc_a_H #include <inttypes.h> #include <stdio.h> +#include <math.h> -#if LV_HAVE_SSE +//! raise a complex float to a real float power +static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, const float power){ + const float arg = power*atan2f(lv_creal(exp), lv_cimag(exp)); + const float mag = powf(lv_creal(exp)*lv_creal(exp) + lv_cimag(exp)*lv_cimag(exp), power/2); + return mag*lv_cmake(cosf(arg), sinf(arg)); +} + +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -18,14 +26,14 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue; @@ -72,16 +80,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l number = quarterPoints * 4; #endif /* LV_HAVE_LIB_SIMDMATH */ - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0; for(;number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power); } } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input complex vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored @@ -89,16 +94,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; unsigned int number = 0; - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0.0; for(number = 0; number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power); } } #endif /* LV_HAVE_GENERIC */ @@ -106,4 +108,4 @@ static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, con -#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h index 39d8f7aa2..8d1959dae 100644 --- a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H -#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H +#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -19,12 +19,12 @@ \param normalizationFactor This value is divided against all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -88,7 +88,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value for each input point \param logPowerOutput The 10.0 * log10(r*r + i*i) for each data point @@ -96,7 +96,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut \param normalizationFactor This value is divided agains all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -123,4 +123,4 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPower -#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h index 0120b5307..fc635f171 100644 --- a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H -#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H +#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H +#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -20,14 +20,14 @@ \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; const float iRBW = 1.0 / rbw; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -94,7 +94,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value divided by the RBW for each input point \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point @@ -103,7 +103,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -131,4 +131,4 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(floa -#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */ +#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */ diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h index a01971df3..a6c21336d 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a.h @@ -1,14 +1,15 @@ -#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H -#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H +#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H +#include <volk/volk_common.h> #include<volk/volk_complex.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -62,9 +63,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -203,9 +204,9 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul #endif #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; int bound = num_bytes >> 4; int leftovers = num_bytes % 16; @@ -341,4 +342,4 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* re -#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h index 2fa5918cc..6b22d9f81 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -5,7 +5,7 @@ #include<volk/volk_complex.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { @@ -57,7 +57,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <xmmintrin.h> #include <pmmintrin.h> @@ -66,7 +66,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; union HalfMask { uint32_t intRep[4]; @@ -131,7 +131,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result Isum += Im; } - result[0] = lv_32fc_init(Rsum,Isum); + result[0] = lv_cmake(Rsum,Isum); return; } diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h index 9a7b65ab4..022a0a614 100644 --- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a.h @@ -1,15 +1,16 @@ -#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H -#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H +#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H +#include <volk/volk_common.h> #include <volk/volk_complex.h> #include <stdio.h> #include <string.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -59,7 +60,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, con #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm @@ -194,7 +195,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, cons #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm volatile ( @@ -316,11 +317,11 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, cons #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { lv_32fc_t dotProduct; @@ -358,7 +359,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const b += 2; } - lv_32fc_t dotProductVector[2] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -373,12 +374,12 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes); +static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { + volk_32fc_x2_dot_prod_32fc_a_sse3(result, input, taps, num_bytes); // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now /* __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; @@ -465,4 +466,4 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, cons #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h index b4214f5d2..18dd092e8 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h @@ -1,12 +1,12 @@ -#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H -#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H +#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H +#define INCLUDED_volk_32fc_x2_multiply_32fc_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> #include <float.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Multiplies the two input complex vectors and stores their results in the third vector @@ -15,7 +15,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored @@ -61,7 +61,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const lv_32fc_t* bPtr= bVector; @@ -73,7 +73,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored @@ -81,9 +81,9 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); -static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ - volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); +static inline void volk_32fc_x2_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ @@ -91,4 +91,4 @@ static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const -#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */ +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */ diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h index 6a863b16d..be7a4ffe9 100644 --- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a.h @@ -1,16 +1,16 @@ -#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H -#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H +#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H +#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> #include <string.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; @@ -105,8 +105,8 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -123,4 +123,4 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(flo #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H*/ diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h index 406097fc8..c21d00491 100644 --- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a.h @@ -1,15 +1,15 @@ -#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H -#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H +#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a_H +#define INCLUDED_volk_32fc_x2_square_dist_32f_a_H #include<inttypes.h> #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -91,8 +91,8 @@ static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_ #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC -static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +#ifdef LV_HAVE_GENERIC +static inline void volk_32fc_x2_square_dist_32f_a_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -109,4 +109,4 @@ static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32 #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/ +#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a_H*/ diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a.h index 0fcadd9cb..558142869 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H -#define INCLUDED_volk_32i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H +#define INCLUDED_volk_32i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -47,7 +47,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer @@ -55,7 +55,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -70,4 +70,4 @@ static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, co -#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_u.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h index 1dd6422f8..d8afd218c 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const i #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer diff --git a/volk/include/volk/volk_32i_x2_and_32i_a16.h b/volk/include/volk/volk_32i_x2_and_32i_a.h index 3baa1d856..dcd63d98e 100644 --- a/volk/include/volk/volk_32i_x2_and_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_and_32i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H -#define INCLUDED_volk_32i_x2_and_32i_a16_H +#ifndef INCLUDED_volk_32i_x2_and_32i_a_H +#define INCLUDED_volk_32i_x2_and_32i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ands the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32 } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32 \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); -static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ - volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_and_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */ +#endif /* INCLUDED_volk_32i_x2_and_32i_a_H */ diff --git a/volk/include/volk/volk_32i_x2_or_32i_a16.h b/volk/include/volk/volk_32i_x2_or_32i_a.h index 0be22f00a..243e8178c 100644 --- a/volk/include/volk/volk_32i_x2_or_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_or_32i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H -#define INCLUDED_volk_32i_x2_or_32i_a16_H +#ifndef INCLUDED_volk_32i_x2_or_32i_a_H +#define INCLUDED_volk_32i_x2_or_32i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ors the two input vectors and store their results in the third vector @@ -13,7 +13,7 @@ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -43,7 +43,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -51,7 +51,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,7 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -71,11 +71,11 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); -static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ - volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_or_32i_a_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points); } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */ +#endif /* INCLUDED_volk_32i_x2_or_32i_a_H */ diff --git a/volk/include/volk/volk_32u_byteswap_a16.h b/volk/include/volk/volk_32u_byteswap_a.h index 7556ec7b1..b88848096 100644 --- a/volk/include/volk/volk_32u_byteswap_a16.h +++ b/volk/include/volk/volk_32u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_32u_byteswap_a16_H -#define INCLUDED_volk_32u_byteswap_a16_H +#ifndef INCLUDED_volk_32u_byteswap_a_H +#define INCLUDED_volk_32u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint32_t* inputPtr = intsToSwap; @@ -51,13 +51,13 @@ static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int32_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = intsToSwap; unsigned int point; @@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned -#endif /* INCLUDED_volk_32u_byteswap_a16_H */ +#endif /* INCLUDED_volk_32u_byteswap_a_H */ diff --git a/volk/include/volk/volk_32u_popcnt_a16.h b/volk/include/volk/volk_32u_popcnt_a.h index f6e25e4e8..b72d605c6 100644 --- a/volk/include/volk/volk_32u_popcnt_a16.h +++ b/volk/include/volk/volk_32u_popcnt_a.h @@ -5,9 +5,9 @@ #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a_generic(uint32_t* ret, const uint32_t value) { // This is faster than a lookup table uint32_t retVal = value; @@ -23,11 +23,11 @@ static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t val #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE4_2 +#ifdef LV_HAVE_SSE4_2 #include <nmmintrin.h> -static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value) { *ret = _mm_popcnt_u32(value); } diff --git a/volk/include/volk/volk_64f_convert_32f_a16.h b/volk/include/volk/volk_64f_convert_32f_a.h index 7dca065f0..2126e4f95 100644 --- a/volk/include/volk/volk_64f_convert_32f_a16.h +++ b/volk/include/volk/volk_64f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_convert_32f_a16_H -#define INCLUDED_volk_64f_convert_32f_a16_H +#ifndef INCLUDED_volk_64f_convert_32f_a_H +#define INCLUDED_volk_64f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const doub \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const d -#endif /* INCLUDED_volk_64f_convert_32f_a16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_a_H */ diff --git a/volk/include/volk/volk_64f_convert_32f_u.h b/volk/include/volk/volk_64f_convert_32f_u.h index 6338c1433..5c323230a 100644 --- a/volk/include/volk/volk_64f_convert_32f_u.h +++ b/volk/include/volk/volk_64f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values diff --git a/volk/include/volk/volk_64f_x2_max_64f_a16.h b/volk/include/volk/volk_64f_x2_max_64f_a.h index 4b0c1f5f1..61a704c52 100644 --- a/volk/include/volk/volk_64f_x2_max_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_max_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H -#define INCLUDED_volk_64f_x2_max_64f_a16_H +#ifndef INCLUDED_volk_64f_x2_max_64f_a_H +#define INCLUDED_volk_64f_x2_max_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -45,7 +45,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */ +#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */ diff --git a/volk/include/volk/volk_64f_x2_min_64f_a16.h b/volk/include/volk/volk_64f_x2_min_64f_a.h index aa961e384..148b72c59 100644 --- a/volk/include/volk/volk_64f_x2_min_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_min_64f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H -#define INCLUDED_volk_64f_x2_min_64f_a16_H +#ifndef INCLUDED_volk_64f_x2_min_64f_a_H +#define INCLUDED_volk_64f_x2_min_64f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -45,7 +45,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -53,7 +53,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */ +#endif /* INCLUDED_volk_64f_x2_min_64f_a_H */ diff --git a/volk/include/volk/volk_64u_byteswap_a16.h b/volk/include/volk/volk_64u_byteswap_a.h index 0eefe0138..d4fc74a6e 100644 --- a/volk/include/volk/volk_64u_byteswap_a16.h +++ b/volk/include/volk/volk_64u_byteswap_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_64u_byteswap_a16_H -#define INCLUDED_volk_64u_byteswap_a16_H +#ifndef INCLUDED_volk_64u_byteswap_a_H +#define INCLUDED_volk_64u_byteswap_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; __m128i input, byte1, byte2, byte3, byte4, output; __m128i byte2mask = _mm_set1_epi32(0x00FF0000); @@ -59,13 +59,13 @@ static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int64_t's. \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; unsigned int point; for(point = 0; point < num_points; point++){ @@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned -#endif /* INCLUDED_volk_64u_byteswap_a16_H */ +#endif /* INCLUDED_volk_64u_byteswap_a_H */ diff --git a/volk/include/volk/volk_64u_popcnt_a16.h b/volk/include/volk/volk_64u_popcnt_a.h index 59511dc29..bdaa98643 100644 --- a/volk/include/volk/volk_64u_popcnt_a16.h +++ b/volk/include/volk/volk_64u_popcnt_a.h @@ -1,14 +1,14 @@ -#ifndef INCLUDED_volk_64u_popcnt_a16_H -#define INCLUDED_volk_64u_popcnt_a16_H +#ifndef INCLUDED_volk_64u_popcnt_a_H +#define INCLUDED_volk_64u_popcnt_a_H #include <stdio.h> #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC -static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) { const uint32_t* valueVector = (const uint32_t*)&value; @@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t val #include <nmmintrin.h> -static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value) { *ret = _mm_popcnt_u64(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_volk_64u_popcnt_a16_H*/ +#endif /*INCLUDED_volk_64u_popcnt_a_H*/ diff --git a/volk/include/volk/volk_8i_convert_16i_a16.h b/volk/include/volk/volk_8i_convert_16i_a.h index 3d7045753..9104f90cb 100644 --- a/volk/include/volk/volk_8i_convert_16i_a16.h +++ b/volk/include/volk/volk_8i_convert_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8i_convert_16i_a16_H -#define INCLUDED_volk_8i_convert_16i_a16_H +#ifndef INCLUDED_volk_8i_convert_16i_a_H +#define INCLUDED_volk_8i_convert_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -13,7 +13,7 @@ \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -47,14 +47,14 @@ static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const i } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -65,16 +65,16 @@ static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); -static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ - volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points); +extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); +static inline void volk_8i_convert_16i_a_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ + volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points); } #endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8i_convert_16i_u.h b/volk/include/volk/volk_8i_convert_16i_u.h index bcff13406..7d7104f52 100644 --- a/volk/include/volk/volk_8i_convert_16i_u.h +++ b/volk/include/volk/volk_8i_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a.h index 99a24ec10..7f2623ac6 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H -#define INCLUDED_volk_8i_s32f_convert_32f_a16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H +#define INCLUDED_volk_8i_s32f_convert_32f_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -66,7 +66,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer @@ -74,7 +74,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -86,7 +86,7 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer @@ -94,10 +94,10 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); -static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); +static inline void volk_8i_s32f_convert_32f_a_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float invscalar = 1.0 / scalar; - volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points); + volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points); } #endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_u.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h index 1e30957e8..3cd6bb67c 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -67,7 +67,7 @@ static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h index 91c9b2c58..8f13da32f 100644 --- a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H -#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H +#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H +#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -51,7 +51,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data \param complexVector The complex input vector @@ -59,7 +59,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -74,4 +74,4 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, in -#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */ diff --git a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h index bf3dc20dd..d26b3d0d0 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a.h @@ -1,10 +1,10 @@ -#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H -#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H +#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H +#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -42,14 +42,14 @@ static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, c #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, -#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */ diff --git a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h index 13de79423..21efed83e 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I vector data @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -43,14 +43,14 @@ static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, cons } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I vector data \param complexVector The complex input vector \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h index 22c3ebb23..b723c6f8b 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H -#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H +#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -14,7 +15,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -74,7 +75,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -84,7 +85,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -95,7 +96,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo __m128 invScalar = _mm_set_ps1(1.0/scalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(complexVectorPtr[0]); @@ -136,7 +137,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data \param complexVector The complex input vector @@ -145,7 +146,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -161,4 +162,4 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, -#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H */ diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h index 5f1430394..74073f5a6 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a.h @@ -1,10 +1,11 @@ -#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H -#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H +#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -13,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -61,7 +62,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -70,7 +71,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -81,7 +82,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c __m128 invScalar = _mm_set_ps1(iScalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -107,7 +108,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I float vector data \param complexVector The complex input vector @@ -115,7 +116,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; @@ -130,4 +131,4 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffe -#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */ diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h index d9cacbf46..0bb76f1d1 100644 --- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H -#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H +#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H +#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -23,7 +23,6 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); for(;number < quarterPoints; number++){ // Convert into 8 bit values into 16 bit values @@ -37,7 +36,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -56,10 +55,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); @@ -68,7 +67,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -76,7 +75,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; int16_t* c16Ptr = (int16_t*)cVector; int8_t* a8Ptr = (int8_t*)aVector; @@ -84,10 +83,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV for(number =0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); @@ -99,4 +98,4 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV -#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */ +#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */ diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h index 6ec923a4f..3e05608a4 100644 --- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a.h @@ -1,11 +1,11 @@ -#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H -#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H +#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H +#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -24,7 +24,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); + __m128 invScalar = _mm_set_ps1(1.0/scalar); for(;number < quarterPoints; number++){ @@ -39,7 +39,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -75,10 +75,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cFloatPtr++ = lv_creal(temp) / scalar; @@ -87,7 +87,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -95,7 +95,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; float* cPtr = (float*)cVector; const float invScalar = 1.0 / scalar; @@ -104,10 +104,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_ for(number = 0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cPtr++ = (lv_creal(temp) * invScalar); @@ -119,4 +119,4 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_ -#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */ +#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */ diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h index 6f444ad89..2c935d1fb 100644 --- a/volk/include/volk/volk_common.h +++ b/volk/include/volk/volk_common.h @@ -1,18 +1,94 @@ -#ifndef INCLUDED_LIBVECTOR_COMMON_H -#define INCLUDED_LIBVECTOR_COMMON_H +#ifndef INCLUDED_LIBVOLK_COMMON_H +#define INCLUDED_LIBVOLK_COMMON_H + +//////////////////////////////////////////////////////////////////////// +// Cross-platform attribute macros +//////////////////////////////////////////////////////////////////////// +#if defined __GNUC__ +# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x))) +# define __VOLK_ATTR_UNUSED __attribute__((unused)) +# define __VOLK_ATTR_INLINE __attribute__((always_inline)) +# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated)) +# if __GNUC__ >= 4 +# define __VOLK_ATTR_EXPORT __attribute__((visibility("default"))) +# define __VOLK_ATTR_IMPORT __attribute__((visibility("default"))) +# else +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +# endif +#elif _MSC_VER +# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE __forceinline +# define __VOLK_ATTR_DEPRECATED __declspec(deprecated) +# define __VOLK_ATTR_EXPORT __declspec(dllexport) +# define __VOLK_ATTR_IMPORT __declspec(dllimport) +#else +# define __VOLK_ATTR_ALIGNED(x) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE +# define __VOLK_ATTR_DEPRECATED +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// Ignore annoying warnings in MSVC +//////////////////////////////////////////////////////////////////////// +#if defined(_MSC_VER) +# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data +# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2' +#endif + +//////////////////////////////////////////////////////////////////////// +// C-linkage declaration macros +// FIXME: due to the usage of complex.h, require gcc for c-linkage +//////////////////////////////////////////////////////////////////////// +#if defined(__cplusplus) && (__GNUC__) +# define __VOLK_DECL_BEGIN extern "C" { +# define __VOLK_DECL_END } +#else +# define __VOLK_DECL_BEGIN +# define __VOLK_DECL_END +#endif + +//////////////////////////////////////////////////////////////////////// +// Define VOLK_API for library symbols +// http://gcc.gnu.org/wiki/Visibility +//////////////////////////////////////////////////////////////////////// +#ifdef volk_EXPORTS +# define VOLK_API __VOLK_ATTR_EXPORT +#else +# define VOLK_API __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// The bit128 union used by some +//////////////////////////////////////////////////////////////////////// +#include <inttypes.h> + +#ifdef LV_HAVE_SSE +#include <xmmintrin.h> +#endif + +#ifdef LV_HAVE_SSE2 +#include <emmintrin.h> +#endif -#include<inttypes.h> -#if LV_HAVE_MMX -#include<xmmintrin.h> union bit128{ uint16_t i16[8]; uint32_t i[4]; float f[4]; double d[2]; - __m128i int_vec; + + #ifdef LV_HAVE_SSE __m128 float_vec; + #endif + + #ifdef LV_HAVE_SSE2 + __m128i int_vec; __m128d double_vec; + #endif }; -#endif /*LV_HAVE_MMX*/ -#endif /*INCLUDED_LIBVECTOR_COMMON_H*/ +#endif /*INCLUDED_LIBVOLK_COMMON_H*/ diff --git a/volk/include/volk/volk_complex.h b/volk/include/volk/volk_complex.h index b20b5cf47..5bd925044 100644 --- a/volk/include/volk/volk_complex.h +++ b/volk/include/volk/volk_complex.h @@ -2,8 +2,21 @@ #define INCLUDE_VOLK_COMPLEX_H /*! - \brief This header file is to prevent issues with having <complex> and <complex.h> variables in the same code as the gcc compiler does not allow that -*/ + * \brief Provide typedefs and operators for all complex types in C and C++. + * + * The typedefs encompass all signed integer and floating point types. + * Each operator function is intended to work across all data types. + * Under C++, these operators are defined as inline templates. + * Under C, these operators are defined as preprocessor macros. + * The use of macros makes the operators agnostic to the type. + * + * The following operator functions are defined: + * - lv_cmake - make a complex type from components + * - lv_creal - get the real part of the complex number + * - lv_cimag - get the imaginary part of the complex number + * - lv_conj - take the conjugate of the complex number + */ + #ifdef __cplusplus #include <complex> @@ -12,60 +25,62 @@ typedef std::complex<int8_t> lv_8sc_t; typedef std::complex<int16_t> lv_16sc_t; typedef std::complex<int32_t> lv_32sc_t; +typedef std::complex<int64_t> lv_64sc_t; typedef std::complex<float> lv_32fc_t; typedef std::complex<double> lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return x.real(); -} - -static inline float lv_cimag(const lv_32fc_t x){ - return x.imag(); +template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){ + return std::complex<T>(r, i); } -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return std::conj(x); +template <typename T> inline typename T::value_type lv_creal(const T &x){ + return x.real(); } -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return std::pow(x, y); +template <typename T> inline typename T::value_type lv_cimag(const T &x){ + return x.imag(); } -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return std::complex<float>(x,y); +template <typename T> inline T lv_conj(const T &x){ + return std::conj(x); } -#else +#else /* __cplusplus */ #include <complex.h> typedef char complex lv_8sc_t; typedef short complex lv_16sc_t; -typedef int complex lv_32sc_t; +typedef long complex lv_32sc_t; +typedef long long complex lv_64sc_t; typedef float complex lv_32fc_t; typedef double complex lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return creal(x); -} +#define lv_cmake(r, i) ((r) + _Complex_I*(i)) -static inline float lv_cimag(const lv_32fc_t x){ - return cimag(x); -} +// When GNUC is available, use the complex extensions. +// The extensions always return the correct value type. +// http://gcc.gnu.org/onlinedocs/gcc/Complex.html +#ifdef __GNUC__ -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return conj(x); -} +#define lv_creal(x) (__real__(x)) -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return cpow(x, y); -} +#define lv_cimag(x) (__imag__(x)) -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return x + I*y; -} +#define lv_conj(x) (~(x)) + +// When not available, use the c99 complex function family, +// which always returns double regardless of the input type. +#else /* __GNUC__ */ + +#define lv_creal(x) (creal(x)) + +#define lv_cimag(x) (cimag(x)) + +#define lv_conj(x) (conj(x)) -#endif +#endif /* __GNUC__ */ +#endif /* __cplusplus */ #endif /* INCLUDE_VOLK_COMPLEX_H */ diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h new file mode 100644 index 000000000..2a7f7e79f --- /dev/null +++ b/volk/include/volk/volk_prefs.h @@ -0,0 +1,25 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +#include <volk/volk_common.h> + +__VOLK_DECL_BEGIN + +struct VOLK_API volk_arch_pref { + char name[128]; + char arch[32]; +}; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +VOLK_API void get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +VOLK_API int load_preferences(struct volk_arch_pref **); + +__VOLK_DECL_END + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/volk/include/volk/volk_regexp.py b/volk/include/volk/volk_regexp.py deleted file mode 100644 index 7b695cb3b..000000000 --- a/volk/include/volk/volk_regexp.py +++ /dev/null @@ -1,8 +0,0 @@ -import re - -remove_after_underscore = re.compile("_.*"); -space_remove = re.compile(" "); -leading_space_remove = re.compile("^ *"); -replace_arch = re.compile(", const char\* arch"); -replace_bracket = re.compile(" {"); -replace_volk = re.compile("volk"); diff --git a/volk/include/volk/volk_register.py b/volk/include/volk/volk_register.py deleted file mode 100755 index bc8f959af..000000000 --- a/volk/include/volk/volk_register.py +++ /dev/null @@ -1,278 +0,0 @@ -#! /usr/bin/env python - -import sys -import re -import string -from xml.dom import minidom -from volk_regexp import * -from make_cpuid_x86_c import make_cpuid_x86_c -from make_cpuid_h import make_cpuid_h -from make_proccpu_sim import make_proccpu_sim -from make_set_simd import make_set_simd -from make_cpuid_generic_c import make_cpuid_generic_c -from make_cpuid_powerpc_c import make_cpuid_powerpc_c -from make_registry import make_registry -from make_h import make_h -from make_init_h import make_init_h -from make_config_fixed import make_config_fixed -from make_config_in import make_config_in -from make_c import make_c -from make_runtime_c import make_runtime_c -from make_init_c import make_init_c -from make_runtime import make_runtime -from make_typedefs import make_typedefs -from make_mktables import make_mktables -from make_environment_init_c import make_environment_init_c -from make_environment_init_h import make_environment_init_h - -outfile_set_simd = open("../../config/lv_set_simd_flags.m4", "w"); -outfile_reg = open("volk_registry.h", "w"); -outfile_h = open("volk.h", "w"); -outfile_c = open("../../lib/volk.c", "w"); -outfile_runtime = open("volk_runtime.h", "w"); -outfile_runtime_c = open("../../lib/volk_runtime.c", "w"); -outfile_typedefs = open("volk_typedefs.h", "w"); -outfile_init_h = open("../../lib/volk_init.h", "w"); -outfile_init_c = open("../../lib/volk_init.c", "w"); -outfile_cpu_h = open("volk_cpu.h", "w"); -outfile_cpu_x86_c = open("../../lib/volk_cpu_x86.c", "w"); -outfile_cpu_generic_c = open("../../lib/volk_cpu_generic.c", "w"); -outfile_cpu_powerpc_c = open("../../lib/volk_cpu_powerpc.c", "w"); -outfile_proccpu_sim = open("../../lib/volk_proccpu_sim.c", "w"); -outfile_config_in = open("../../volk_config.h.in", "w"); -outfile_config_fixed = open("volk_config_fixed.h", "w"); -outfile_mktables = open("../../lib/volk_mktables.c", "w"); -outfile_environment_c = open("../../lib/volk_environment_init.c", "w"); -outfile_environment_h = open("volk_environment_init.h", "w"); -infile = open("Makefile.am", "r"); - - -mfile = infile.readlines(); - -datatypes = []; -functions = []; - - - -for line in mfile: - subline = re.search(".*_(a16|u)\.h.*", line); - if subline: - subsubline = re.search("(?<=volk_).*", subline.group(0)); - if subsubline: - dtype = remove_after_underscore.sub("", subsubline.group(0)); - subdtype = re.search("[0-9]+[A-z]+", dtype); - if subdtype: - datatypes.append(subdtype.group(0)); - - -datatypes = set(datatypes); - -for line in mfile: - for dt in datatypes: - if dt in line: - subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line); - if subline: - - subsubline = re.search(".+(?=\.h)", subline.group(0)); - functions.append(subsubline.group(0)); - -archs = []; -afile = minidom.parse("archs.xml"); -filearchs = afile.getElementsByTagName("arch"); -for filearch in filearchs: - archs.append(str(filearch.attributes["name"].value)); - -for arch in archs: - a_var = re.search("^\$", arch); - if a_var: - archs.remove(arch); - - - -archs_or = "(" -for arch in archs: - archs_or = archs_or + string.upper(arch) + "|"; -archs_or = archs_or[0:len(archs_or)-1]; -archs_or = archs_or + ")"; - - -taglist = []; -fcountlist = []; -arched_arglist = []; -retlist = []; -my_arglist = []; -my_argtypelist = []; -for func in functions: - tags = []; - fcount = []; - infile_source = open(func + ".h"); - begun_name = 0; - begun_paren = 0; - sourcefile = infile_source.readlines(); - infile_source.close(); - for line in sourcefile: - - archline = re.search("^\#if.*?LV_HAVE_" + archs_or + ".*", line); - if archline: - arch = archline.group(0); - archline = re.findall(archs_or + "(?=( |\n|&))", line); - if archline: - archsublist = []; - for tup in archline: - archsublist.append(tup[0]); - fcount.append(archsublist); - testline = re.search("static inline.*?" + func, line); - if (not testline): - continue - tagline = re.search(func + "_.+", line); - if tagline: - tag = re.search("(?<=" + func + "_)\w+(?= *\()",line); - if tag: - tag = re.search("\w+", tag.group(0)); - if tag: - tags.append(tag.group(0)); - - - if begun_name == 0: - retline = re.search(".+(?=" + func + ")", line); - if retline: - ret = retline.group(0); - - - - - subline = re.search(func + ".*", line); - if subline: - subsubline = re.search("\(.*?\)", subline.group(0)); - if subsubline: - args = subsubline.group(0); - - else: - begun_name = 1; - subsubline = re.search("\(.*", subline.group(0)); - if subsubline: - args = subsubline.group(0); - begun_paren = 1; - else: - if begun_paren == 1: - subline = re.search(".*?\)", line); - if subline: - args = args + subline.group(0); - begun_name = 0; - begun_paren = 0; - else: - subline = re.search(".*", line); - args = args + subline.group(0); - else: - subline = re.search("\(.*?\)", line); - if subline: - args = subline.group(0); - begun_name = 0; - else: - subline = re.search("\(.*", line); - if subline: - args = subline.group(0); - begun_paren = 1; - - replace = re.compile("static "); - ret = replace.sub("", ret); - replace = re.compile("inline "); - ret = replace.sub("", ret); - replace = re.compile("\)"); - arched_args = replace.sub(", const char* arch) {", args); - - remove = re.compile('\)|\(|{'); - rargs = remove.sub("", args); - sargs = rargs.split(','); - - - - margs = []; - atypes = []; - for arg in sargs: - temp = arg.split(" "); - margs.append(temp[-1]); - replace = re.compile(" " + temp[-1]); - atypes.append(replace.sub("", arg)); - - - my_args = "" - arg_types = "" - for arg in range(0, len(margs) - 1): - this_arg = leading_space_remove.sub("", margs[arg]); - my_args = my_args + this_arg + ", "; - this_type = leading_space_remove.sub("", atypes[arg]); - arg_types = arg_types + this_type + ", "; - - this_arg = leading_space_remove.sub("", margs[-1]); - my_args = my_args + this_arg; - this_type = leading_space_remove.sub("", atypes[-1]); - arg_types = arg_types + this_type; - my_argtypelist.append(arg_types); - - if(ret[-1] != ' '): - ret = ret + ' '; - - arched_arglist.append(arched_args); #!!!!!!!!!!! - my_arglist.append(my_args) #!!!!!!!!!!!!!!!!! - retlist.append(ret); - fcountlist.append(fcount); - taglist.append(tags); - -outfile_mktables.write(make_mktables(functions)); -outfile_mktables.close(); - - -outfile_cpu_h.write(make_cpuid_h(filearchs)); -outfile_cpu_h.close(); - -outfile_cpu_x86_c.write(make_cpuid_x86_c(filearchs)); -outfile_cpu_x86_c.close(); - -outfile_proccpu_sim.write(make_proccpu_sim(filearchs)); -outfile_proccpu_sim.close(); - -outfile_set_simd.write(make_set_simd(filearchs)); -outfile_set_simd.close(); - -outfile_cpu_generic_c.write(make_cpuid_generic_c(filearchs)); -outfile_cpu_generic_c.close(); - -outfile_cpu_powerpc_c.write(make_cpuid_powerpc_c(filearchs)); -outfile_cpu_powerpc_c.close(); - -outfile_config_in.write(make_config_in(filearchs)); -outfile_config_in.close(); - -outfile_reg.write(make_registry(filearchs, functions, fcountlist)); -outfile_reg.close(); - -outfile_h.write(make_h(functions, arched_arglist, retlist)); -outfile_h.close(); - -outfile_init_h.write(make_init_h(functions, arched_arglist, retlist)); -outfile_init_h.close(); - -outfile_config_fixed.write(make_config_fixed(filearchs)); -outfile_config_fixed.close(); - -outfile_c.write( make_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_c.close(); - -outfile_runtime_c.write(make_runtime_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_runtime_c.close(); - -outfile_init_c.write(make_init_c(functions, filearchs)); -outfile_init_c.close(); - -outfile_runtime.write(make_runtime(functions)); -outfile_runtime.close(); - -outfile_typedefs.write(make_typedefs(functions, retlist, my_argtypelist)); -outfile_typedefs.close(); - -outfile_environment_c.write(make_environment_init_c(filearchs)); -outfile_environment_c.close(); - -outfile_environment_h.write(make_environment_init_h()); -outfile_environment_h.close(); |