From 239144659b29c0a5ecd83a34e0e57387a1060ed7 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Dec 2010 18:50:28 -0500 Subject: Initial checkin for VOLK - Vector-Optimized Library of Kernels. This is a new SIMD library. It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time. --- volk/lib/Makefile.am | 361 +++++++++++++++++++++ volk/lib/assembly.h | 67 ++++ volk/lib/cpuid_x86.S | 60 ++++ volk/lib/cpuid_x86_64.S | 54 +++ volk/lib/qa_16s_add_quad_aligned16.cc | 89 +++++ volk/lib/qa_16s_add_quad_aligned16.h | 18 + volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 106 ++++++ volk/lib/qa_16s_branch_4_state_8_aligned16.h | 18 + volk/lib/qa_16s_convert_32f_aligned16.cc | 73 +++++ volk/lib/qa_16s_convert_32f_aligned16.h | 18 + volk/lib/qa_16s_convert_32f_unaligned16.cc | 73 +++++ volk/lib/qa_16s_convert_32f_unaligned16.h | 18 + volk/lib/qa_16s_convert_8s_aligned16.cc | 60 ++++ volk/lib/qa_16s_convert_8s_aligned16.h | 18 + volk/lib/qa_16s_convert_8s_unaligned16.cc | 60 ++++ volk/lib/qa_16s_convert_8s_unaligned16.h | 18 + volk/lib/qa_16s_max_star_aligned16.cc | 65 ++++ volk/lib/qa_16s_max_star_aligned16.h | 18 + volk/lib/qa_16s_max_star_horizontal_aligned16.cc | 79 +++++ volk/lib/qa_16s_max_star_horizontal_aligned16.h | 18 + .../lib/qa_16s_permute_and_scalar_add_aligned16.cc | 78 +++++ volk/lib/qa_16s_permute_and_scalar_add_aligned16.h | 18 + volk/lib/qa_16s_quad_max_star_aligned16.cc | 59 ++++ volk/lib/qa_16s_quad_max_star_aligned16.h | 18 + volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 76 +++++ volk/lib/qa_16sc_deinterleave_16s_aligned16.h | 18 + volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 63 ++++ volk/lib/qa_16sc_deinterleave_32f_aligned16.h | 18 + .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc | 71 ++++ volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h | 18 + .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 123 +++++++ volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h | 18 + volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 60 ++++ volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h | 18 + volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 70 ++++ volk/lib/qa_16sc_magnitude_16s_aligned16.h | 18 + volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 70 ++++ volk/lib/qa_16sc_magnitude_32f_aligned16.h | 18 + volk/lib/qa_16u_byteswap_aligned16.cc | 60 ++++ volk/lib/qa_16u_byteswap_aligned16.h | 18 + volk/lib/qa_32f_accumulator_aligned16.cc | 56 ++++ volk/lib/qa_32f_accumulator_aligned16.h | 18 + volk/lib/qa_32f_add_aligned16.cc | 60 ++++ volk/lib/qa_32f_add_aligned16.h | 18 + .../qa_32f_calc_spectral_noise_floor_aligned16.cc | 59 ++++ .../qa_32f_calc_spectral_noise_floor_aligned16.h | 18 + volk/lib/qa_32f_convert_16s_aligned16.cc | 70 ++++ volk/lib/qa_32f_convert_16s_aligned16.h | 18 + volk/lib/qa_32f_convert_16s_unaligned16.cc | 70 ++++ volk/lib/qa_32f_convert_16s_unaligned16.h | 18 + volk/lib/qa_32f_convert_32s_aligned16.cc | 70 ++++ volk/lib/qa_32f_convert_32s_aligned16.h | 18 + volk/lib/qa_32f_convert_32s_unaligned16.cc | 70 ++++ volk/lib/qa_32f_convert_32s_unaligned16.h | 18 + volk/lib/qa_32f_convert_64f_aligned16.cc | 60 ++++ volk/lib/qa_32f_convert_64f_aligned16.h | 18 + volk/lib/qa_32f_convert_64f_unaligned16.cc | 60 ++++ volk/lib/qa_32f_convert_64f_unaligned16.h | 18 + volk/lib/qa_32f_convert_8s_aligned16.cc | 70 ++++ volk/lib/qa_32f_convert_8s_aligned16.h | 18 + volk/lib/qa_32f_convert_8s_unaligned16.cc | 70 ++++ volk/lib/qa_32f_convert_8s_unaligned16.h | 18 + volk/lib/qa_32f_divide_aligned16.cc | 60 ++++ volk/lib/qa_32f_divide_aligned16.h | 18 + volk/lib/qa_32f_dot_prod_aligned16.cc | 183 +++++++++++ volk/lib/qa_32f_dot_prod_aligned16.h | 18 + volk/lib/qa_32f_dot_prod_unaligned16.cc | 190 +++++++++++ volk/lib/qa_32f_dot_prod_unaligned16.h | 18 + volk/lib/qa_32f_fm_detect_aligned16.cc | 60 ++++ volk/lib/qa_32f_fm_detect_aligned16.h | 18 + volk/lib/qa_32f_index_max_aligned16.cc | 103 ++++++ volk/lib/qa_32f_index_max_aligned16.h | 18 + volk/lib/qa_32f_interleave_16sc_aligned16.cc | 75 +++++ volk/lib/qa_32f_interleave_16sc_aligned16.h | 18 + volk/lib/qa_32f_interleave_32fc_aligned16.cc | 62 ++++ volk/lib/qa_32f_interleave_32fc_aligned16.h | 18 + volk/lib/qa_32f_max_aligned16.cc | 60 ++++ volk/lib/qa_32f_max_aligned16.h | 18 + volk/lib/qa_32f_min_aligned16.cc | 60 ++++ volk/lib/qa_32f_min_aligned16.h | 18 + volk/lib/qa_32f_multiply_aligned16.cc | 60 ++++ volk/lib/qa_32f_multiply_aligned16.h | 18 + volk/lib/qa_32f_normalize_aligned16.cc | 65 ++++ volk/lib/qa_32f_normalize_aligned16.h | 18 + volk/lib/qa_32f_power_aligned16.cc | 95 ++++++ volk/lib/qa_32f_power_aligned16.h | 18 + volk/lib/qa_32f_sqrt_aligned16.cc | 59 ++++ volk/lib/qa_32f_sqrt_aligned16.h | 18 + volk/lib/qa_32f_stddev_aligned16.cc | 74 +++++ volk/lib/qa_32f_stddev_aligned16.h | 18 + volk/lib/qa_32f_stddev_and_mean_aligned16.cc | 75 +++++ volk/lib/qa_32f_stddev_and_mean_aligned16.h | 18 + volk/lib/qa_32f_subtract_aligned16.cc | 60 ++++ volk/lib/qa_32f_subtract_aligned16.h | 18 + volk/lib/qa_32f_sum_of_poly_aligned16.cc | 142 ++++++++ volk/lib/qa_32f_sum_of_poly_aligned16.h | 18 + volk/lib/qa_32fc_32f_multiply_aligned16.cc | 85 +++++ volk/lib/qa_32fc_32f_multiply_aligned16.h | 18 + volk/lib/qa_32fc_32f_power_32fc_aligned16.cc | 83 +++++ volk/lib/qa_32fc_32f_power_32fc_aligned16.h | 18 + volk/lib/qa_32fc_atan2_32f_aligned16.cc | 75 +++++ volk/lib/qa_32fc_atan2_32f_aligned16.h | 18 + volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc | 137 ++++++++ volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h | 18 + volk/lib/qa_32fc_deinterleave_32f_aligned16.cc | 63 ++++ volk/lib/qa_32fc_deinterleave_32f_aligned16.h | 18 + volk/lib/qa_32fc_deinterleave_64f_aligned16.cc | 63 ++++ volk/lib/qa_32fc_deinterleave_64f_aligned16.h | 18 + .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc | 60 ++++ volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h | 18 + .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc | 60 ++++ volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h | 18 + .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc | 60 ++++ volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h | 18 + volk/lib/qa_32fc_dot_prod_aligned16.cc | 214 ++++++++++++ volk/lib/qa_32fc_dot_prod_aligned16.h | 20 ++ volk/lib/qa_32fc_index_max_aligned16.cc | 89 +++++ volk/lib/qa_32fc_index_max_aligned16.h | 18 + volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 70 ++++ volk/lib/qa_32fc_magnitude_16s_aligned16.h | 18 + volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 70 ++++ volk/lib/qa_32fc_magnitude_32f_aligned16.h | 18 + volk/lib/qa_32fc_multiply_aligned16.cc | 86 +++++ volk/lib/qa_32fc_multiply_aligned16.h | 18 + ...qa_32fc_power_spectral_density_32f_aligned16.cc | 63 ++++ .../qa_32fc_power_spectral_density_32f_aligned16.h | 18 + volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc | 63 ++++ volk/lib/qa_32fc_power_spectrum_32f_aligned16.h | 18 + volk/lib/qa_32fc_square_dist_aligned16.cc | 91 ++++++ volk/lib/qa_32fc_square_dist_aligned16.h | 18 + .../qa_32fc_square_dist_scalar_mult_aligned16.cc | 96 ++++++ .../qa_32fc_square_dist_scalar_mult_aligned16.h | 18 + volk/lib/qa_32s_and_aligned16.cc | 60 ++++ volk/lib/qa_32s_and_aligned16.h | 18 + volk/lib/qa_32s_convert_32f_aligned16.cc | 60 ++++ volk/lib/qa_32s_convert_32f_aligned16.h | 18 + volk/lib/qa_32s_convert_32f_unaligned16.cc | 60 ++++ volk/lib/qa_32s_convert_32f_unaligned16.h | 18 + volk/lib/qa_32s_or_aligned16.cc | 60 ++++ volk/lib/qa_32s_or_aligned16.h | 18 + volk/lib/qa_32u_byteswap_aligned16.cc | 59 ++++ volk/lib/qa_32u_byteswap_aligned16.h | 18 + volk/lib/qa_32u_popcnt_aligned16.cc | 61 ++++ volk/lib/qa_32u_popcnt_aligned16.h | 18 + volk/lib/qa_64f_convert_32f_aligned16.cc | 60 ++++ volk/lib/qa_64f_convert_32f_aligned16.h | 18 + volk/lib/qa_64f_convert_32f_unaligned16.cc | 60 ++++ volk/lib/qa_64f_convert_32f_unaligned16.h | 18 + volk/lib/qa_64f_max_aligned16.cc | 60 ++++ volk/lib/qa_64f_max_aligned16.h | 18 + volk/lib/qa_64f_min_aligned16.cc | 60 ++++ volk/lib/qa_64f_min_aligned16.h | 18 + volk/lib/qa_64u_byteswap_aligned16.cc | 59 ++++ volk/lib/qa_64u_byteswap_aligned16.h | 18 + volk/lib/qa_64u_popcnt_aligned16.cc | 61 ++++ volk/lib/qa_64u_popcnt_aligned16.h | 18 + volk/lib/qa_8s_convert_16s_aligned16.cc | 63 ++++ volk/lib/qa_8s_convert_16s_aligned16.h | 18 + volk/lib/qa_8s_convert_16s_unaligned16.cc | 63 ++++ volk/lib/qa_8s_convert_16s_unaligned16.h | 18 + volk/lib/qa_8s_convert_32f_aligned16.cc | 63 ++++ volk/lib/qa_8s_convert_32f_aligned16.h | 18 + volk/lib/qa_8s_convert_32f_unaligned16.cc | 63 ++++ volk/lib/qa_8s_convert_32f_unaligned16.h | 18 + volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 67 ++++ volk/lib/qa_8sc_deinterleave_16s_aligned16.h | 18 + volk/lib/qa_8sc_deinterleave_32f_aligned16.cc | 134 ++++++++ volk/lib/qa_8sc_deinterleave_32f_aligned16.h | 18 + volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc | 64 ++++ volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h | 18 + volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 138 ++++++++ volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h | 18 + volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc | 60 ++++ volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h | 18 + .../qa_8sc_multiply_conjugate_16sc_aligned16.cc | 87 +++++ .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h | 18 + .../qa_8sc_multiply_conjugate_32fc_aligned16.cc | 87 +++++ .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h | 18 + volk/lib/qa_volk.cc | 211 ++++++++++++ volk/lib/qa_volk.h | 36 ++ volk/lib/test_all.cc | 82 +++++ volk/lib/volk_rank_archs.c | 13 + volk/lib/volk_rank_archs.h | 14 + 183 files changed, 9136 insertions(+) create mode 100644 volk/lib/Makefile.am create mode 100644 volk/lib/assembly.h create mode 100644 volk/lib/cpuid_x86.S create mode 100644 volk/lib/cpuid_x86_64.S create mode 100644 volk/lib/qa_16s_add_quad_aligned16.cc create mode 100644 volk/lib/qa_16s_add_quad_aligned16.h create mode 100644 volk/lib/qa_16s_branch_4_state_8_aligned16.cc create mode 100644 volk/lib/qa_16s_branch_4_state_8_aligned16.h create mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc create mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h create mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc create mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h create mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc create mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h create mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc create mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h create mode 100644 volk/lib/qa_16s_max_star_aligned16.cc create mode 100644 volk/lib/qa_16s_max_star_aligned16.h create mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc create mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h create mode 100644 volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc create mode 100644 volk/lib/qa_16s_permute_and_scalar_add_aligned16.h create mode 100644 volk/lib/qa_16s_quad_max_star_aligned16.cc create mode 100644 volk/lib/qa_16s_quad_max_star_aligned16.h create mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc create mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h create mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc create mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h create mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc create mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h create mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc create mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h create mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc create mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h create mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc create mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h create mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc create mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h create mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc create mode 100644 volk/lib/qa_16u_byteswap_aligned16.h create mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc create mode 100644 volk/lib/qa_32f_accumulator_aligned16.h create mode 100644 volk/lib/qa_32f_add_aligned16.cc create mode 100644 volk/lib/qa_32f_add_aligned16.h create mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc create mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h create mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc create mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h create mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc create mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h create mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc create mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h create mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc create mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h create mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc create mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h create mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc create mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h create mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc create mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h create mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc create mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h create mode 100644 volk/lib/qa_32f_divide_aligned16.cc create mode 100644 volk/lib/qa_32f_divide_aligned16.h create mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc create mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h create mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc create mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h create mode 100644 volk/lib/qa_32f_fm_detect_aligned16.cc create mode 100644 volk/lib/qa_32f_fm_detect_aligned16.h create mode 100644 volk/lib/qa_32f_index_max_aligned16.cc create mode 100644 volk/lib/qa_32f_index_max_aligned16.h create mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc create mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h create mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc create mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h create mode 100644 volk/lib/qa_32f_max_aligned16.cc create mode 100644 volk/lib/qa_32f_max_aligned16.h create mode 100644 volk/lib/qa_32f_min_aligned16.cc create mode 100644 volk/lib/qa_32f_min_aligned16.h create mode 100644 volk/lib/qa_32f_multiply_aligned16.cc create mode 100644 volk/lib/qa_32f_multiply_aligned16.h create mode 100644 volk/lib/qa_32f_normalize_aligned16.cc create mode 100644 volk/lib/qa_32f_normalize_aligned16.h create mode 100644 volk/lib/qa_32f_power_aligned16.cc create mode 100644 volk/lib/qa_32f_power_aligned16.h create mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc create mode 100644 volk/lib/qa_32f_sqrt_aligned16.h create mode 100644 volk/lib/qa_32f_stddev_aligned16.cc create mode 100644 volk/lib/qa_32f_stddev_aligned16.h create mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc create mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h create mode 100644 volk/lib/qa_32f_subtract_aligned16.cc create mode 100644 volk/lib/qa_32f_subtract_aligned16.h create mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc create mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h create mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc create mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h create mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc create mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h create mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h create mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc create mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h create mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc create mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h create mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc create mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h create mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc create mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h create mode 100644 volk/lib/qa_32fc_index_max_aligned16.cc create mode 100644 volk/lib/qa_32fc_index_max_aligned16.h create mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc create mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h create mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc create mode 100644 volk/lib/qa_32fc_multiply_aligned16.h create mode 100644 volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc create mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h create mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc create mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h create mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc create mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h create mode 100644 volk/lib/qa_32s_and_aligned16.cc create mode 100644 volk/lib/qa_32s_and_aligned16.h create mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc create mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h create mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc create mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h create mode 100644 volk/lib/qa_32s_or_aligned16.cc create mode 100644 volk/lib/qa_32s_or_aligned16.h create mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc create mode 100644 volk/lib/qa_32u_byteswap_aligned16.h create mode 100644 volk/lib/qa_32u_popcnt_aligned16.cc create mode 100644 volk/lib/qa_32u_popcnt_aligned16.h create mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc create mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h create mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc create mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h create mode 100644 volk/lib/qa_64f_max_aligned16.cc create mode 100644 volk/lib/qa_64f_max_aligned16.h create mode 100644 volk/lib/qa_64f_min_aligned16.cc create mode 100644 volk/lib/qa_64f_min_aligned16.h create mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc create mode 100644 volk/lib/qa_64u_byteswap_aligned16.h create mode 100644 volk/lib/qa_64u_popcnt_aligned16.cc create mode 100644 volk/lib/qa_64u_popcnt_aligned16.h create mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc create mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h create mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc create mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h create mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc create mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h create mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc create mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h create mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc create mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h create mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc create mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h create mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc create mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h create mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc create mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h create mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc create mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h create mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc create mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h create mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc create mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h create mode 100644 volk/lib/qa_volk.cc create mode 100644 volk/lib/qa_volk.h create mode 100644 volk/lib/test_all.cc create mode 100644 volk/lib/volk_rank_archs.c create mode 100644 volk/lib/volk_rank_archs.h (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am new file mode 100644 index 000000000..97eb75680 --- /dev/null +++ b/volk/lib/Makefile.am @@ -0,0 +1,361 @@ +# +# Copyright 2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +include $(top_srcdir)/Makefile.common + +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) $(LV_CXXFLAGS) + + +# We build 2 libraries and 1 executable here. One library contains +# everything except the libcppunit QA code, and one contains only the +# libcppunit-based QA code. The C++ QA code is especially recommended +# when you have general purpose C or C++ code that may not get +# thoroughly exercised by building and running a GR block. The +# executable runs the QA code at "make check" time. +# +# N.B., If there's a SWIG generated shared library and associated +# python code, it will be contained in ../python, not here. (That +# code is conditionally built depending on the state of the +# --without-python configure option.) However, the .i should be here +# next to the .h that it's based on. + + +# list of programs run by "make check" and "make distcheck" +TESTS = test_all + + +lib_LTLIBRARIES = \ + libvolk.la \ + libvolk_runtime.la \ + libvolk_qa.la + + +# ---------------------------------------------------------------- +# The main library +# ---------------------------------------------------------------- + +universal_runtime_CODE = \ + volk_runtime.c \ + volk_init.c \ + volk_rank_archs.c + +universal_CODE = \ + volk.c \ + volk_environment_init.c + +generic_CODE = \ + volk_cpu_generic.cc + +x86_CODE = \ + volk_cpu_x86.c + +x86_SUBCODE = \ + cpuid_x86.S + +x86_64_SUBCODE = \ + cpuid_x86_64.S + +powerpc_CODE = \ + volk_cpu_powerpc.cc + + +if MD_CPU_generic +libvolk_la_SOURCES = \ + $(generic_CODE) \ + $(universal_CODE) +libvolk_runtime_la_SOURCES = \ + $(generic_CODE) \ + $(universal_runtime_CODE) + +endif + +if MD_CPU_x86 +if MD_SUBCPU_x86_64 +libvolk_la_SOURCES = \ + $(x86_CODE) \ + $(x86_64_SUBCODE) \ + $(universal_CODE) + +libvolk_runtime_la_SOURCES = \ + $(x86_CODE) \ + $(x86_64_SUBCODE) \ + $(universal_runtime_CODE) +else +libvolk_la_SOURCES = \ + $(x86_CODE) \ + $(x86_SUBCODE) \ + $(universal_CODE) + +libvolk_runtime_la_SOURCES = \ + $(x86_CODE) \ + $(x86_SUBCODE) \ + $(universal_runtime_CODE) +endif +endif + + +if MD_CPU_powerpc +libvolk_la_SOURCES = \ + $(powerpc_CODE) \ + $(universal_CODE) + +libvolk_runtime_la_SOURCES = \ + $(powerpc_CODE) \ + $(universal_runtime_CODE) +endif + + + +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 + +libvolk_la_LIBADD = + + + +# ---------------------------------------------------------------- +# The QA library. Note libvolk.la in LIBADD +# ---------------------------------------------------------------- +libvolk_qa_la_SOURCES = \ + qa_volk.cc \ + qa_16s_quad_max_star_aligned16.cc \ + qa_32fc_dot_prod_aligned16.cc \ + qa_32fc_square_dist_aligned16.cc \ + qa_32fc_square_dist_scalar_mult_aligned16.cc \ + qa_32f_sum_of_poly_aligned16.cc \ + qa_32fc_index_max_aligned16.cc \ + qa_32f_index_max_aligned16.cc \ + qa_32fc_conjugate_dot_prod_aligned16.cc \ + qa_16s_permute_and_scalar_add_aligned16.cc \ + qa_16s_branch_4_state_8_aligned16.cc \ + qa_16s_max_star_horizontal_aligned16.cc \ + qa_16s_max_star_aligned16.cc \ + qa_16s_add_quad_aligned16.cc \ + qa_32f_add_aligned16.cc \ + qa_32f_subtract_aligned16.cc \ + qa_32f_max_aligned16.cc \ + qa_32f_min_aligned16.cc \ + qa_64f_max_aligned16.cc \ + qa_64f_min_aligned16.cc \ + qa_32s_and_aligned16.cc \ + qa_32s_or_aligned16.cc \ + qa_32f_dot_prod_aligned16.cc \ + qa_32f_dot_prod_unaligned16.cc \ + qa_32f_fm_detect_aligned16.cc \ + qa_32fc_32f_multiply_aligned16.cc \ + qa_32fc_multiply_aligned16.cc \ + qa_32f_divide_aligned16.cc \ + qa_32f_multiply_aligned16.cc \ + qa_32f_sqrt_aligned16.cc \ + qa_8sc_multiply_conjugate_16sc_aligned16.cc \ + qa_8sc_multiply_conjugate_32fc_aligned16.cc \ + qa_32u_popcnt_aligned16.cc \ + qa_64u_popcnt_aligned16.cc \ + qa_64u_byteswap_aligned16.cc \ + qa_8sc_deinterleave_32f_aligned16.cc \ + qa_16sc_deinterleave_32f_aligned16.cc \ + qa_8sc_deinterleave_16s_aligned16.cc \ + qa_32f_interleave_32fc_aligned16.cc \ + qa_16u_byteswap_aligned16.cc \ + qa_16sc_deinterleave_16s_aligned16.cc \ + qa_32fc_deinterleave_real_32f_aligned16.cc \ + qa_32fc_magnitude_32f_aligned16.cc \ + qa_32fc_deinterleave_real_64f_aligned16.cc \ + qa_32fc_deinterleave_real_16s_aligned16.cc \ + qa_32fc_magnitude_16s_aligned16.cc \ + qa_32fc_deinterleave_32f_aligned16.cc \ + qa_8sc_deinterleave_real_8s_aligned16.cc \ + qa_32fc_deinterleave_64f_aligned16.cc \ + qa_32f_interleave_16sc_aligned16.cc \ + qa_16sc_deinterleave_real_8s_aligned16.cc \ + qa_16sc_deinterleave_real_32f_aligned16.cc \ + qa_16sc_magnitude_32f_aligned16.cc \ + qa_32u_byteswap_aligned16.cc \ + qa_16sc_deinterleave_real_16s_aligned16.cc \ + qa_8sc_deinterleave_real_32f_aligned16.cc \ + qa_16sc_magnitude_16s_aligned16.cc \ + qa_32f_normalize_aligned16.cc \ + qa_8sc_deinterleave_real_16s_aligned16.cc \ + qa_16s_convert_32f_aligned16.cc \ + qa_16s_convert_32f_unaligned16.cc \ + qa_16s_convert_8s_aligned16.cc \ + qa_16s_convert_8s_unaligned16.cc \ + qa_32f_convert_16s_aligned16.cc \ + qa_32f_convert_16s_unaligned16.cc \ + qa_32f_convert_32s_aligned16.cc \ + qa_32f_convert_32s_unaligned16.cc \ + qa_32f_convert_64f_aligned16.cc \ + qa_32f_convert_64f_unaligned16.cc \ + qa_32f_convert_8s_aligned16.cc \ + qa_32f_convert_8s_unaligned16.cc \ + qa_32s_convert_32f_aligned16.cc \ + qa_32s_convert_32f_unaligned16.cc \ + qa_64f_convert_32f_aligned16.cc \ + qa_64f_convert_32f_unaligned16.cc \ + qa_8s_convert_16s_aligned16.cc \ + qa_8s_convert_16s_unaligned16.cc \ + qa_8s_convert_32f_aligned16.cc \ + qa_8s_convert_32f_unaligned16.cc \ + qa_32fc_32f_power_32fc_aligned16.cc \ + qa_32f_power_aligned16.cc \ + qa_32fc_atan2_32f_aligned16.cc \ + qa_32fc_power_spectral_density_32f_aligned16.cc \ + qa_32fc_power_spectrum_32f_aligned16.cc \ + qa_32f_calc_spectral_noise_floor_aligned16.cc \ + qa_32f_accumulator_aligned16.cc \ + qa_32f_stddev_aligned16.cc \ + qa_32f_stddev_and_mean_aligned16.cc + +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 + +libvolk_qa_la_LIBADD = \ + libvolk.la \ + libvolk_runtime.la \ + $(CPPUNIT_LIBS) + +# ---------------------------------------------------------------- +# headers that don't get installed +# ---------------------------------------------------------------- +noinst_HEADERS = \ + volk_init.h \ + qa_volk.h \ + qa_16s_quad_max_star_aligned16.h \ + qa_32fc_dot_prod_aligned16.h \ + qa_32fc_square_dist_aligned16.h \ + qa_32fc_square_dist_scalar_mult_aligned16.h \ + qa_32f_sum_of_poly_aligned16.h \ + qa_32fc_index_max_aligned16.h \ + qa_32f_index_max_aligned16.h \ + qa_32fc_conjugate_dot_prod_aligned16.h \ + qa_16s_permute_and_scalar_add_aligned16.h \ + qa_16s_branch_4_state_8_aligned16.h \ + qa_16s_max_star_horizontal_aligned16.h \ + qa_16s_max_star_aligned16.h \ + qa_16s_add_quad_aligned16.h \ + qa_32f_add_aligned16.h \ + qa_32f_subtract_aligned16.h \ + qa_32f_max_aligned16.h \ + qa_32f_min_aligned16.h \ + qa_64f_max_aligned16.h \ + qa_64f_min_aligned16.h \ + qa_32s_and_aligned16.h \ + qa_32s_or_aligned16.h \ + qa_32f_dot_prod_aligned16.h \ + qa_32f_dot_prod_unaligned16.h \ + qa_32f_fm_detect_aligned16.h \ + qa_32fc_32f_multiply_aligned16.h \ + qa_32fc_multiply_aligned16.h \ + qa_32f_divide_aligned16.h \ + qa_32f_multiply_aligned16.h \ + qa_32f_sqrt_aligned16.h \ + qa_8sc_multiply_conjugate_16sc_aligned16.h \ + qa_8sc_multiply_conjugate_32fc_aligned16.h \ + qa_32u_popcnt_aligned16.h \ + qa_64u_popcnt_aligned16.h \ + qa_64u_byteswap_aligned16.h \ + qa_8sc_deinterleave_32f_aligned16.h \ + qa_16sc_deinterleave_32f_aligned16.h \ + qa_8sc_deinterleave_16s_aligned16.h \ + qa_32f_interleave_32fc_aligned16.h \ + qa_16u_byteswap_aligned16.h \ + qa_16sc_deinterleave_16s_aligned16.h \ + qa_32fc_deinterleave_real_32f_aligned16.h \ + qa_32fc_magnitude_32f_aligned16.h \ + qa_32fc_deinterleave_real_64f_aligned16.h \ + qa_32fc_deinterleave_real_16s_aligned16.h \ + qa_32fc_magnitude_16s_aligned16.h \ + qa_32fc_deinterleave_32f_aligned16.h \ + qa_8sc_deinterleave_real_8s_aligned16.h \ + qa_32fc_deinterleave_64f_aligned16.h \ + qa_32f_interleave_16sc_aligned16.h \ + qa_16sc_deinterleave_real_8s_aligned16.h \ + qa_16sc_deinterleave_real_32f_aligned16.h \ + qa_16sc_magnitude_32f_aligned16.h \ + qa_32u_byteswap_aligned16.h \ + qa_16sc_deinterleave_real_16s_aligned16.h \ + qa_8sc_deinterleave_real_32f_aligned16.h \ + qa_16sc_magnitude_16s_aligned16.h \ + qa_32f_normalize_aligned16.h \ + qa_8sc_deinterleave_real_16s_aligned16.h \ + qa_16s_convert_32f_aligned16.h \ + qa_16s_convert_32f_unaligned16.h \ + qa_16s_convert_8s_aligned16.h \ + qa_16s_convert_8s_unaligned16.h \ + qa_32f_convert_16s_aligned16.h \ + qa_32f_convert_16s_unaligned16.h \ + qa_32f_convert_32s_aligned16.h \ + qa_32f_convert_32s_unaligned16.h \ + qa_32f_convert_64f_aligned16.h \ + qa_32f_convert_64f_unaligned16.h \ + qa_32f_convert_8s_aligned16.h \ + qa_32f_convert_8s_unaligned16.h \ + qa_32s_convert_32f_aligned16.h \ + qa_32s_convert_32f_unaligned16.h \ + qa_64f_convert_32f_aligned16.h \ + qa_64f_convert_32f_unaligned16.h \ + qa_8s_convert_16s_aligned16.h \ + qa_8s_convert_16s_unaligned16.h \ + qa_8s_convert_32f_aligned16.h \ + qa_8s_convert_32f_unaligned16.h \ + qa_32fc_32f_power_32fc_aligned16.h \ + qa_32f_power_aligned16.h \ + qa_32fc_atan2_32f_aligned16.h \ + qa_32fc_power_spectral_density_32f_aligned16.h \ + qa_32fc_power_spectrum_32f_aligned16.h \ + qa_32f_calc_spectral_noise_floor_aligned16.h \ + qa_32f_accumulator_aligned16.h \ + qa_32f_stddev_aligned16.h \ + qa_32f_stddev_and_mean_aligned16.h + + +# ---------------------------------------------------------------- +# Our test program +# ---------------------------------------------------------------- +noinst_PROGRAMS = \ + test_all + +test_all_SOURCES = test_all.cc +test_all_LDADD = libvolk_qa.la + + +distclean-local: + rm -f volk.c + rm -f volk_cpu_generic.c + rm -f volk_cpu_powerpc.c + rm -f volk_cpu_x86.c + rm -f volk_init.c + rm -f volk_init.h + rm -f volk_mktables + rm -f volk_mktables.c + rm -f volk_proccpu_sim.c + rm -f volk_runtime.c + rm -f volk_tables.h + rm -f volk_environment_init.c +#SUBDIRS = + +#ifdef BUILD_SSE +#SUBDIRS += sse +#elif BUILD_SPU +#SUBDIRS += spu +#else +#SUBDIRS += port +#endif + + diff --git a/volk/lib/assembly.h b/volk/lib/assembly.h new file mode 100644 index 000000000..8a99aa07c --- /dev/null +++ b/volk/lib/assembly.h @@ -0,0 +1,67 @@ +/* -*- c++ -*- */ +/* + * Copyright 2002 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _ASSEMBLY_H_ +#define _ASSEMBLY_H_ + +#if defined (__APPLE__) && defined (__APPLE_CC__) + +// XCode ignores the .scl and .type functions in XCode 2.2.1 and 2.3, +// but creates an error in XCode 2.4. Just ignore them. + +#define GLOB_SYMB(f) _ ## f + +#define DEF_FUNC_HEAD(f) /* none */ + +#define FUNC_TAIL(f) /* none*/ + +#elif !defined (__ELF__) + +/* + * Too bad, the following define does not work as expected --SF + * #define GLOB_SYMB(f) __USER_LABEL_PREFIX__ ## f + */ +#define GLOB_SYMB(f) _ ## f + +#define DEF_FUNC_HEAD(f) \ + .def GLOB_SYMB(f); .scl 2; .type 32; .endef + +#define FUNC_TAIL(f) /* none */ + + +#else /* !__ELF__ */ + + +#define GLOB_SYMB(f) f + +#define DEF_FUNC_HEAD(f) \ + .type GLOB_SYMB(f),@function \ + +#define FUNC_TAIL(f) \ + .Lfe1: \ + .size GLOB_SYMB(f),.Lfe1-GLOB_SYMB(f) + + +#endif /* !__ELF__ */ + + +#endif /* _ASSEMBLY_H_ */ diff --git a/volk/lib/cpuid_x86.S b/volk/lib/cpuid_x86.S new file mode 100644 index 000000000..4e1a9404f --- /dev/null +++ b/volk/lib/cpuid_x86.S @@ -0,0 +1,60 @@ +# +# Copyright 2003 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +# +# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result +# +# void cpuid_x86 (unsigned int op, unsigned int result[4]); +# + +#include "assembly.h" + +.file "cpuid_x86.S" + .version "01.01" +.text +.globl GLOB_SYMB(cpuid_x86) + DEF_FUNC_HEAD(cpuid_x86) +GLOB_SYMB(cpuid_x86): + pushl %ebp + movl %esp, %ebp + pushl %ebx # must save in PIC mode, holds GOT pointer + pushl %esi + + movl 8(%ebp), %eax # op + movl 12(%ebp), %esi # result + cpuid + movl %eax, 0(%esi) + movl %ebx, 4(%esi) + movl %ecx, 8(%esi) + movl %edx, 12(%esi) + + popl %esi + popl %ebx + popl %ebp + ret + +FUNC_TAIL(cpuid_x86) + .ident "Hand coded cpuid assembly" + + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/volk/lib/cpuid_x86_64.S b/volk/lib/cpuid_x86_64.S new file mode 100644 index 000000000..32b1847cd --- /dev/null +++ b/volk/lib/cpuid_x86_64.S @@ -0,0 +1,54 @@ +# +# Copyright 2003,2005 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +# +# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result +# +# void cpuid_x86 (unsigned int op, unsigned int result[4]); +# + +#include "assembly.h" + +.file "cpuid_x86_64.S" + .version "01.01" +.text +.globl GLOB_SYMB(cpuid_x86) + DEF_FUNC_HEAD(cpuid_x86) +GLOB_SYMB(cpuid_x86): + mov %rbx, %r11 # must save in PIC mode, holds GOT pointer + + mov %rdi, %rax # op + cpuid + movl %eax, 0(%rsi) # result + movl %ebx, 4(%rsi) + movl %ecx, 8(%rsi) + movl %edx, 12(%rsi) + + mov %r11, %rbx + retq + +FUNC_TAIL(cpuid_x86) + .ident "Hand coded cpuid64 assembly" + + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc new file mode 100644 index 000000000..c3005c1be --- /dev/null +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_add_quad_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + + + +void qa_16s_add_quad_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3200; + const int ITERS = 100000; + short input0[vlen] __attribute__ ((aligned (16))); + short input1[vlen] __attribute__ ((aligned (16))); + short input2[vlen] __attribute__ ((aligned (16))); + short input3[vlen] __attribute__ ((aligned (16))); + short input4[vlen] __attribute__ ((aligned (16))); + + short output0[vlen] __attribute__ ((aligned (16))); + short output1[vlen] __attribute__ ((aligned (16))); + short output2[vlen] __attribute__ ((aligned (16))); + short output3[vlen] __attribute__ ((aligned (16))); + short output01[vlen] __attribute__ ((aligned (16))); + short output11[vlen] __attribute__ ((aligned (16))); + short output21[vlen] __attribute__ ((aligned (16))); + short output31[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; + + input0[i] = plus0 - minus0; + input1[i] = plus1 - minus1; + input2[i] = plus2 - minus2; + input3[i] = plus3 - minus3; + input4[i] = plus4 - minus4; + + } + printf("16s_add_quad_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]); + CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]); + CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_add_quad_aligned16.h b/volk/lib/qa_16s_add_quad_aligned16.h new file mode 100644 index 000000000..3c1ae978b --- /dev/null +++ b/volk/lib/qa_16s_add_quad_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H +#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H + +#include +#include + +class qa_16s_add_quad_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc new file mode 100644 index 000000000..ba5e8ed93 --- /dev/null +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -0,0 +1,106 @@ +#include +#include +#include +#include + +//test for ssse3 + +#ifndef LV_HAVE_SSSE3 + +void qa_16s_branch_4_state_8_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16s_branch_4_state_8_aligned16::t1() { + const int num_iters = 1000000; + const int vlen = 32; + + static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03}; + static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01}; + static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f}; + static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d}; + static char* permuters[4] = {permute0, permute1, permute2, permute3}; + + unsigned int num_bytes = vlen << 1; + + volk_environment_init(); + clock_t start, end; + double total; + + short target[vlen] __attribute__ ((aligned (16))); + short target2[vlen] __attribute__ ((aligned (16))); + short target3[vlen] __attribute__ ((aligned (16))); + + short src0[vlen] __attribute__ ((aligned (16))); + short permute_indexes[vlen] __attribute__ ((aligned (16))) = { +7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; + short cntl0[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + short cntl1[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + short cntl2[vlen] __attribute__ ((aligned (16))) = { + 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; + short cntl3[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; + short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + + + + for(int i = 0; i < vlen; ++i) { + src0[i] = i; + + } + + + printf("16s_branch_4_state_8_aligned\n"); + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time: %f\n", total); + + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("branch_4_state_8_time, ssse3: %f\n", total); + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time, generic: %f\n", total); + + + + for(int i = 0; i < vlen; ++i) { + printf("psa... %d, b4s8... %d\n", target[i], target3[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT(target[i] == target2[i]); + CPPUNIT_ASSERT(target[i] == target3[i]); + } +} + + +#endif diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.h b/volk/lib/qa_16s_branch_4_state_8_aligned16.h new file mode 100644 index 000000000..41ab073e0 --- /dev/null +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H +#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H + +#include +#include + +class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc new file mode 100644 index 000000000..7878d4737 --- /dev/null +++ b/volk/lib/qa_16s_convert_32f_aligned16.cc @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE + +void qa_16s_convert_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_16s_convert_32f_aligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int16_t input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("16s_convert_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h new file mode 100644 index 000000000..ef813d96f --- /dev/null +++ b/volk/lib/qa_16s_convert_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H +#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H + +#include +#include + +class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc new file mode 100644 index 000000000..8c3121e5c --- /dev/null +++ b/volk/lib/qa_16s_convert_32f_unaligned16.cc @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE + +void qa_16s_convert_32f_unaligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_16s_convert_32f_unaligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int16_t input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("16s_convert_32f_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h new file mode 100644 index 000000000..aeb04f770 --- /dev/null +++ b/volk/lib/qa_16s_convert_32f_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H +#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H + +#include +#include + +class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc new file mode 100644 index 000000000..734b7784e --- /dev/null +++ b/volk/lib/qa_16s_convert_8s_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_convert_8s_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_convert_8s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int16_t input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("16s_convert_8s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h new file mode 100644 index 000000000..2e409d0cc --- /dev/null +++ b/volk/lib/qa_16s_convert_8s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H +#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H + +#include +#include + +class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc new file mode 100644 index 000000000..275ab7668 --- /dev/null +++ b/volk/lib/qa_16s_convert_8s_unaligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_convert_8s_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_convert_8s_unaligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int16_t input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("16s_convert_8s_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h new file mode 100644 index 000000000..4b2fe9e42 --- /dev/null +++ b/volk/lib/qa_16s_convert_8s_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H +#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H + +#include +#include + +class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc new file mode 100644 index 000000000..b46b9ae8e --- /dev/null +++ b/volk/lib/qa_16s_max_star_aligned16.cc @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +//test for ssse3 + +#ifndef LV_HAVE_SSSE3 + +void qa_16s_max_star_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + + + +void qa_16s_max_star_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 6400; + const int ITERS = 100000; + short input0[vlen] __attribute__ ((aligned (16))); + short output0[1] __attribute__ ((aligned (16))); + + short output1[1] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + + short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + + input0[i] = plus0 - minus0; + + } + printf("16s_max_star_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < 1; ++i) { + + CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h new file mode 100644 index 000000000..119f87c4d --- /dev/null +++ b/volk/lib/qa_16s_max_star_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H +#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H + +#include +#include + +class qa_16s_max_star_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc new file mode 100644 index 000000000..4d44735df --- /dev/null +++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +//test for ssse3 + +#ifndef LV_HAVE_SSSE3 + +void qa_16s_max_star_horizontal_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + + +void qa_16s_max_star_horizontal_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 32; + const int ITERS = 1; + short input0[vlen] __attribute__ ((aligned (16))); + short output0[vlen>>1] __attribute__ ((aligned (16))); + + short output1[vlen>>1] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + short plus0 = ((short) (rand() - (RAND_MAX/2))); + + short minus0 = ((short) (rand() - (RAND_MAX/2))); + + input0[i] = plus0 - minus0; + + } + printf("16s_max_star_horizontal_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic"); + volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic"); + volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + + get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen); + get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); + get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); + /* volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3"); + volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3"); + volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/ + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < (vlen >> 1); ++i) { + // printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + + } + for(int i = 0; i < (vlen >> 1); ++i) { + + CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); + } + } + + +#endif + diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h new file mode 100644 index 000000000..9f9757253 --- /dev/null +++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H +#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H + +#include +#include + +class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc new file mode 100644 index 000000000..3c4f5c6cc --- /dev/null +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_permute_and_scalar_add_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_permute_and_scalar_add_aligned16::t1() { + const int vlen = 64; + + unsigned int num_bytes = vlen << 1; + + volk_environment_init(); + clock_t start, end; + double total; + + short target[vlen] __attribute__ ((aligned (16))); + short target2[vlen] __attribute__ ((aligned (16))); + short src0[vlen] __attribute__ ((aligned (16))); + short permute_indexes[vlen] __attribute__ ((aligned (16))); + short cntl0[vlen] __attribute__ ((aligned (16))); + short cntl1[vlen] __attribute__ ((aligned (16))); + short cntl2[vlen] __attribute__ ((aligned (16))); + short cntl3[vlen] __attribute__ ((aligned (16))); + short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + + for(int i = 0; i < vlen; ++i) { + src0[i] = i; + permute_indexes[i] = (3 * i)%vlen; + cntl0[i] = 0xff; + cntl1[i] = 0xff * (i%2); + cntl2[i] = 0xff * ((i>>1)%2); + cntl3[i] = 0xff * ((i%4) == 3); + } + + printf("16s_permute_and_scalar_add_aligned\n"); + + start = clock(); + for(int i = 0; i < 100000; ++i) { + volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("generic_time: %f\n", total); + + start = clock(); + for(int i = 0; i < 100000; ++i) { + volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("sse2_time: %f\n", total); + + + for(int i = 0; i < vlen; ++i) { + //printf("generic... %d, sse2... %d\n", target[i], target2[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT(target[i] == target2[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h new file mode 100644 index 000000000..3643aeef6 --- /dev/null +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H +#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H + +#include +#include + +class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc new file mode 100644 index 000000000..80a220c93 --- /dev/null +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_quad_max_star_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_quad_max_star_aligned16::t1() { + const int vlen = 34; + + short input0[vlen] __attribute__ ((aligned (16))); + short input1[vlen] __attribute__ ((aligned (16))); + short input2[vlen] __attribute__ ((aligned (16))); + short input3[vlen] __attribute__ ((aligned (16))); + + short output0[vlen] __attribute__ ((aligned (16))); + short output1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + short plus0 = (short) (rand() - (RAND_MAX/2)); + short plus1 = (short) (rand() - (RAND_MAX/2)); + short plus2 = (short) (rand() - (RAND_MAX/2)); + short plus3 = (short) (rand() - (RAND_MAX/2)); + + short minus0 = (short) (rand() - (RAND_MAX/2)); + short minus1 = (short) (rand() - (RAND_MAX/2)); + short minus2 = (short) (rand() - (RAND_MAX/2)); + short minus3 = (short) (rand() - (RAND_MAX/2)); + + input0[i] = plus0 - minus0; + input1[i] = plus1 - minus1; + input2[i] = plus2 - minus2; + input3[i] = plus3 - minus3; + } + + volk_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic"); + + volk_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2"); + + printf("16s_quad_max_star_aligned\n"); + for(int i = 0; i < vlen; ++i) { + printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); + } +} + +#endif diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.h b/volk/lib/qa_16s_quad_max_star_aligned16.h new file mode 100644 index 000000000..51e77081a --- /dev/null +++ b/volk/lib/qa_16s_quad_max_star_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H +#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H + +#include +#include + +class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc new file mode 100644 index 000000000..e700ac72c --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSSE3 + +void qa_16sc_deinterleave_16s_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_generic1[vlen] __attribute__ ((aligned (16))); + int16_t output_sse2[vlen] __attribute__ ((aligned (16))); + int16_t output_sse21[vlen] __attribute__ ((aligned (16))); + int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); + int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); + } + printf("16sc_deinterleave_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); + + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); + } +} + +#endif diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h new file mode 100644 index 000000000..995ab5b34 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H +#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H + +#include +#include + +class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc new file mode 100644 index 000000000..6ee076998 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_16sc_deinterleave_32f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_generic1[vlen] __attribute__ ((aligned (16))); + float output_sse2[vlen] __attribute__ ((aligned (16))); + float output_sse21[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); + } + printf("16sc_deinterleave_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h new file mode 100644 index 000000000..fea3b6c2d --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H +#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H + +#include +#include + +class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc new file mode 100644 index 000000000..ca048ea67 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc @@ -0,0 +1,71 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSSE3 + +void qa_16sc_deinterleave_real_16s_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_real_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse2[vlen] __attribute__ ((aligned (16))); + int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); + } + printf("16sc_deinterleave_real_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < vlen; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h new file mode 100644 index 000000000..ebb70b97a --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H +#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H + +#include +#include + +class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc new file mode 100644 index 000000000..0f4ba6923 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +#ifndef LV_HAVE_SSE + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); + } + printf("16sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* SSE */ + +#else + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); + } + printf("16sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* SSE4_1 */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h new file mode 100644 index 000000000..e83426473 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H +#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H + +#include +#include + +class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc new file mode 100644 index 000000000..5ab458bc9 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSSE3 + +void qa_16sc_deinterleave_real_8s_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_real_8s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); + } + printf("16sc_deinterleave_real_8s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + } +} + +#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h new file mode 100644 index 000000000..04e5511e5 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H +#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H + +#include +#include + +class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc new file mode 100644 index 000000000..b14610757 --- /dev/null +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE3 + +void qa_16sc_magnitude_16s_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_magnitude_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse[vlen] __attribute__ ((aligned (16))); + int16_t output_sse3[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); + } + printf("16sc_magnitude_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + } +} + +#endif diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h new file mode 100644 index 000000000..4664b70f4 --- /dev/null +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H +#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H + +#include +#include + +class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc new file mode 100644 index 000000000..06dff2fd5 --- /dev/null +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE3 + +void qa_16sc_magnitude_32f_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_magnitude_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse3[vlen] __attribute__ ((aligned (16))); + + int16_t* inputLoad = (int16_t*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("16sc_magnitude_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h new file mode 100644 index 000000000..0c25673ea --- /dev/null +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H +#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H + +#include +#include + +class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc new file mode 100644 index 000000000..6b19828a4 --- /dev/null +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_16u_byteswap_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16u_byteswap_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100001; + + uint16_t output0[vlen] __attribute__ ((aligned (16))); + uint16_t output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); + } + memcpy(output01, output0, vlen*sizeof(uint16_t)); + + printf("16u_byteswap_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16u_byteswap_aligned16_manual(output0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h new file mode 100644 index 000000000..e11b23e3f --- /dev/null +++ b/volk/lib/qa_16u_byteswap_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H +#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H + +#include +#include + +class qa_16u_byteswap_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc new file mode 100644 index 000000000..ea637d600 --- /dev/null +++ b/volk/lib/qa_32f_accumulator_aligned16.cc @@ -0,0 +1,56 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_accumulator_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_accumulator_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + float accumulator_generic; + float accumulator_sse; + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_accumulator_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4); +} + +#endif diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h new file mode 100644 index 000000000..0004d3ff0 --- /dev/null +++ b/volk/lib/qa_32f_accumulator_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H +#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H + +#include +#include + +class qa_32f_accumulator_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc new file mode 100644 index 000000000..92f35c7ec --- /dev/null +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_add_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_add_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_add_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h new file mode 100644 index 000000000..58e2a151c --- /dev/null +++ b/volk/lib/qa_32f_add_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H +#define INCLUDED_QA_32F_ADD_ALIGNED16_H + +#include +#include + +class qa_32f_add_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_add_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc new file mode 100644 index 000000000..3c8137004 --- /dev/null +++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_calc_spectral_noise_floor_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_calc_spectral_noise_floor_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + float output0[1] __attribute__ ((aligned (16))); + float output01[1] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_calc_spectral_noise_floor_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < 1; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h new file mode 100644 index 000000000..c5dce2c4b --- /dev/null +++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H +#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H + +#include +#include + +class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc new file mode 100644 index 000000000..84a4c40c4 --- /dev/null +++ b/volk/lib/qa_32f_convert_16s_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_16s_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse[vlen] __attribute__ ((aligned (16))); + int16_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < vlen; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h new file mode 100644 index 000000000..fce1eb417 --- /dev/null +++ b/volk/lib/qa_32f_convert_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H + +#include +#include + +class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc new file mode 100644 index 000000000..9469daed2 --- /dev/null +++ b/volk/lib/qa_32f_convert_16s_unaligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_16s_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_16s_unaligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse[vlen] __attribute__ ((aligned (16))); + int16_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_16s_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h new file mode 100644 index 000000000..492bc80e6 --- /dev/null +++ b/volk/lib/qa_32f_convert_16s_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H + +#include +#include + +class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc new file mode 100644 index 000000000..ff24c7b0d --- /dev/null +++ b/volk/lib/qa_32f_convert_32s_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_32s_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_32s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int32_t output_generic[vlen] __attribute__ ((aligned (16))); + int32_t output_sse[vlen] __attribute__ ((aligned (16))); + int32_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_32s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h new file mode 100644 index 000000000..97d854463 --- /dev/null +++ b/volk/lib/qa_32f_convert_32s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H + +#include +#include + +class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc new file mode 100644 index 000000000..e63b17994 --- /dev/null +++ b/volk/lib/qa_32f_convert_32s_unaligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_32s_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_32s_unaligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int32_t output_generic[vlen] __attribute__ ((aligned (16))); + int32_t output_sse[vlen] __attribute__ ((aligned (16))); + int32_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_32s_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h new file mode 100644 index 000000000..5d662d86d --- /dev/null +++ b/volk/lib/qa_32f_convert_32s_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H + +#include +#include + +class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc new file mode 100644 index 000000000..c546e47de --- /dev/null +++ b/volk/lib/qa_32f_convert_64f_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_64f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_64f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + double output_generic[vlen] __attribute__ ((aligned (16))); + double output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_64f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h new file mode 100644 index 000000000..41eb3e094 --- /dev/null +++ b/volk/lib/qa_32f_convert_64f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H + +#include +#include + +class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc new file mode 100644 index 000000000..24b51f9af --- /dev/null +++ b/volk/lib/qa_32f_convert_64f_unaligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_64f_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_64f_unaligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + double output_generic[vlen] __attribute__ ((aligned (16))); + double output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_64f_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h new file mode 100644 index 000000000..4b144f033 --- /dev/null +++ b/volk/lib/qa_32f_convert_64f_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H + +#include +#include + +class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc new file mode 100644 index 000000000..a3d4d6567 --- /dev/null +++ b/volk/lib/qa_32f_convert_8s_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_8s_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_8s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_sse[vlen] __attribute__ ((aligned (16))); + int8_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_8s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h new file mode 100644 index 000000000..68a523f34 --- /dev/null +++ b/volk/lib/qa_32f_convert_8s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H + +#include +#include + +class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc new file mode 100644 index 000000000..d885fd6bb --- /dev/null +++ b/volk/lib/qa_32f_convert_8s_unaligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32f_convert_8s_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_convert_8s_unaligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_sse[vlen] __attribute__ ((aligned (16))); + int8_t output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_convert_8s_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); + CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); + } +} + +#endif diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h new file mode 100644 index 000000000..88d4ff42a --- /dev/null +++ b/volk/lib/qa_32f_convert_8s_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H +#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H + +#include +#include + +class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc new file mode 100644 index 000000000..b20999beb --- /dev/null +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_divide_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_divide_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_divide_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h new file mode 100644 index 000000000..79d5ae4b8 --- /dev/null +++ b/volk/lib/qa_32f_divide_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H +#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H + +#include +#include + +class qa_32f_divide_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc new file mode 100644 index 000000000..98c1f2d99 --- /dev/null +++ b/volk/lib/qa_32f_dot_prod_aligned16.cc @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifndef LV_HAVE_SSE4_1 + +#ifdef LV_HAVE_SSE3 +void qa_32f_dot_prod_aligned16::t1() { + const int vlen = 2046; + const int ITER = 100000; + + int i; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + float * input; + float * taps; + + float * result_generic; + float * result_sse; + float * result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); + + random_floats((float*)input, vlen); + random_floats((float*)taps, vlen); + + + printf("32f_dot_prod_aligned16\n"); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); + + for(i = 0; i < ITER; i++){ + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse); + free(result_sse3); + +} +#else +void qa_32f_dot_prod_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE3 */ + +#else + +void qa_32f_dot_prod_aligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 4095; + const int ITER = 100000; + + int i; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + float * input; + float * taps; + + float * result_generic; + float * result_sse; + float * result_sse3; + float * result_sse4_1; + + ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); + + random_floats((float*)input, vlen); + random_floats((float*)taps, vlen); + + printf("32f_dot_prod_aligned16\n"); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); + for(i =0; i < ITER; i++){ + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse); + free(result_sse3); + free(result_sse4_1); + +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h new file mode 100644 index 000000000..6931a9e98 --- /dev/null +++ b/volk/lib/qa_32f_dot_prod_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H +#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H + +#include +#include + +class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc new file mode 100644 index 000000000..8e97d4249 --- /dev/null +++ b/volk/lib/qa_32f_dot_prod_unaligned16.cc @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifndef LV_HAVE_SSE4_1 + +#ifdef LV_HAVE_SSE3 +void qa_32f_dot_prod_unaligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 2046; + const int ITER = 100000; + + int i; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + float * input; + float * taps; + + float * result_generic; + float * result_sse; + float * result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); + + random_floats((float*)input, vlen); + random_floats((float*)taps, vlen); + + + printf("32f_dot_prod_unaligned16\n"); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); + + for(i = 0; i < ITER; i++){ + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse); + free(result_sse3); + +} +#else +void qa_32f_dot_prod_unaligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE3 */ + +#else + +void qa_32f_dot_prod_unaligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 4095; + const int ITER = 100000; + + int i; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + float * input; + float * taps; + + float * result_generic; + float * result_sse; + float * result_sse3; + float * result_sse4_1; + + ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float)); + ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); + ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); + + input = &input[1]; // Make sure the buffer is unaligned + taps = &taps[1]; // Make sure the buffer is unaligned + + random_floats((float*)input, vlen); + random_floats((float*)taps, vlen); + + printf("32f_dot_prod_unaligned16\n"); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + start = clock(); + for(i = 0; i < ITER; i++){ + get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); + for(i =0; i < ITER; i++){ + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); + } + + free(&input[-1]); + free(&taps[-1]); + free(result_generic); + free(result_sse); + free(result_sse3); + free(result_sse4_1); + +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h new file mode 100644 index 000000000..e8bad07fe --- /dev/null +++ b/volk/lib/qa_32f_dot_prod_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H +#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H + +#include +#include + +class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc new file mode 100644 index 000000000..ca65add28 --- /dev/null +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_fm_detect_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_fm_detect_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + float input0[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_fm_detect_aligned\n"); + + start = clock(); + float save = 0.1; + for(int count = 0; count < ITERS; ++count) { + volk_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + save = 0.1; + for(int count = 0; count < ITERS; ++count) { + volk_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32f_fm_detect_aligned16.h b/volk/lib/qa_32f_fm_detect_aligned16.h new file mode 100644 index 000000000..a2680c524 --- /dev/null +++ b/volk/lib/qa_32f_fm_detect_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H +#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H + +#include +#include + +class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc new file mode 100644 index 000000000..a1c3d4cd1 --- /dev/null +++ b/volk/lib/qa_32f_index_max_aligned16.cc @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) +#define NUM_ITERS 1000000 +#define VEC_LEN 3097 +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE + +void qa_32f_index_max_aligned16::t1(){ + printf("sse not available... no test performed\n"); +} + +#else + + +void qa_32f_index_max_aligned16::t1(){ + + const int vlen = VEC_LEN; + + + volk_runtime_init(); + + volk_environment_init(); + int ret; + + unsigned int* target_sse4_1; + unsigned int* target_sse; + unsigned int* target_generic; + float* src0 ; + + + unsigned int i_target_sse4_1; + target_sse4_1 = &i_target_sse4_1; + unsigned int i_target_sse; + target_sse = &i_target_sse; + unsigned int i_target_generic; + target_generic = &i_target_generic; + + ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float)); + + random_floats((float*)src0, vlen); + + printf("32f_index_max_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1 time: %f\n", total); + + + printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]); + CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]); + CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]); + + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_index_max_aligned16.h b/volk/lib/qa_32f_index_max_aligned16.h new file mode 100644 index 000000000..8cadffa47 --- /dev/null +++ b/volk/lib/qa_32f_index_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H +#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H + +#include +#include + +class qa_32f_index_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc new file mode 100644 index 000000000..2a937637f --- /dev/null +++ b/volk/lib/qa_32f_interleave_16sc_aligned16.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_32f_interleave_16sc_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32f_interleave_16sc_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + std::complex output_generic[vlen] __attribute__ ((aligned (16))); + std::complex output_sse[vlen] __attribute__ ((aligned (16))); + std::complex output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_interleave_16sc_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01); + } +} + +#endif diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h new file mode 100644 index 000000000..8d2914817 --- /dev/null +++ b/volk/lib/qa_32f_interleave_16sc_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H +#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H + +#include +#include + +class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc new file mode 100644 index 000000000..c22dd1046 --- /dev/null +++ b/volk/lib/qa_32f_interleave_32fc_aligned16.cc @@ -0,0 +1,62 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_interleave_32fc_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_interleave_32fc_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + std::complex output_generic[vlen] __attribute__ ((aligned (16))); + std::complex output_sse[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_interleave_32fc_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h new file mode 100644 index 000000000..cba518d37 --- /dev/null +++ b/volk/lib/qa_32f_interleave_32fc_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H +#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H + +#include +#include + +class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc new file mode 100644 index 000000000..3ef375176 --- /dev/null +++ b/volk/lib/qa_32f_max_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_max_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_max_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_max_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h new file mode 100644 index 000000000..d535479f4 --- /dev/null +++ b/volk/lib/qa_32f_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H +#define INCLUDED_QA_32F_MAX_ALIGNED16_H + +#include +#include + +class qa_32f_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc new file mode 100644 index 000000000..617e18b24 --- /dev/null +++ b/volk/lib/qa_32f_min_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_min_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_min_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_min_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h new file mode 100644 index 000000000..90961ac92 --- /dev/null +++ b/volk/lib/qa_32f_min_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H +#define INCLUDED_QA_32F_MIN_ALIGNED16_H + +#include +#include + +class qa_32f_min_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_min_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc new file mode 100644 index 000000000..c77fe97da --- /dev/null +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_multiply_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_multiply_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_multiply_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h new file mode 100644 index 000000000..7032a2ad4 --- /dev/null +++ b/volk/lib/qa_32f_multiply_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H +#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H + +#include +#include + +class qa_32f_multiply_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc new file mode 100644 index 000000000..2954fc3ae --- /dev/null +++ b/volk/lib/qa_32f_normalize_aligned16.cc @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_normalize_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_normalize_aligned16::t1() { + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + const int vlen = 320001; + const int ITERS = 100; + + float* output0; + float* output01; + ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); + ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); + + for(int i = 0; i < vlen; ++i) { + output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + memcpy(output01, output0, vlen*sizeof(float)); + printf("32f_normalize_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + // printf("%e...%e\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + } + + free(output0); + free(output01); +} + +#endif diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h new file mode 100644 index 000000000..7c421eb82 --- /dev/null +++ b/volk/lib/qa_32f_normalize_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H +#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H + +#include +#include + +class qa_32f_normalize_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc new file mode 100644 index 000000000..1b331daeb --- /dev/null +++ b/volk/lib/qa_32f_power_aligned16.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifdef LV_HAVE_SSE +void qa_32f_power_aligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 2046; + const int ITERS = 10000; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + float* input; + int i; + + float* result_generic; + float* result_sse; + float* result_sse4_1; + + ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float)); + + random_floats((float*)input, vlen); + + const float power = 3; + + printf("32f_power_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_power_aligned16_manual(result_generic, input, power, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_power_aligned16_manual(result_sse, input, power, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1_time: %f\n", total); + + + for(i = 0; i < vlen; i++){ + //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA); + CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA); + } + + free(input); + free(result_generic); + free(result_sse); + +} +#else +void qa_32f_power_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE */ + diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h new file mode 100644 index 000000000..d45df4e56 --- /dev/null +++ b/volk/lib/qa_32f_power_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H +#define INCLUDED_QA_32F_POWER_ALIGNED16_H + +#include +#include + +class qa_32f_power_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_power_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc new file mode 100644 index 000000000..a3e6abc18 --- /dev/null +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_sqrt_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_sqrt_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + // No reason to test negative numbers because they result in NaN. + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); + } + printf("32f_sqrt_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h new file mode 100644 index 000000000..e4b99d981 --- /dev/null +++ b/volk/lib/qa_32f_sqrt_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H +#define INCLUDED_QA_32F_SQRT_ALIGNED16_H + +#include +#include + +class qa_32f_sqrt_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc new file mode 100644 index 000000000..c0f22cdea --- /dev/null +++ b/volk/lib/qa_32f_stddev_aligned16.cc @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_stddev_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_stddev_aligned16::t1() { + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + float stddev_generic; + float stddev_sse; + float stddev_sse4_1; + float mean = 0; + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + mean += input0[i]; + } + mean /= static_cast(vlen); + + printf("32f_stddev_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); + +} + +#endif diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h new file mode 100644 index 000000000..7f8d7a5fc --- /dev/null +++ b/volk/lib/qa_32f_stddev_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H +#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H + +#include +#include + +class qa_32f_stddev_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc new file mode 100644 index 000000000..dcad8bcf3 --- /dev/null +++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_stddev_and_mean_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_stddev_and_mean_aligned16::t1() { + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + + float stddev_generic; + float stddev_sse; + float stddev_sse4_1; + float mean_generic; + float mean_sse; + float mean_sse4_1; + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_stddev_and_mean_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4); + +} + +#endif diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h new file mode 100644 index 000000000..e08bd249a --- /dev/null +++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H +#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H + +#include +#include + +class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc new file mode 100644 index 000000000..a7e1b5ae3 --- /dev/null +++ b/volk/lib/qa_32f_subtract_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_subtract_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_subtract_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_subtract_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h new file mode 100644 index 000000000..97c14f129 --- /dev/null +++ b/volk/lib/qa_32f_subtract_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H +#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H + +#include +#include + +class qa_32f_subtract_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc new file mode 100644 index 000000000..494776357 --- /dev/null +++ b/volk/lib/qa_32f_sum_of_poly_aligned16.cc @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include + +#define SNR 30.0 +#define CENTER -4.0 +#define CUTOFF -5.595 +#define ERR_DELTA (1e-4) +#define NUM_ITERS 100000 +#define VEC_LEN 64 +static float uniform() { + return ((float) rand() / RAND_MAX); // uniformly (0, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * -SNR/2.0; + + } +} + + +#ifndef LV_HAVE_SSE3 + +void qa_32f_sum_of_poly_aligned16::t1(){ + printf("sse3 not available... no test performed\n"); +} + +#else + + +void qa_32f_sum_of_poly_aligned16::t1(){ + int i = 0; + + volk_environment_init(); + int ret; + + const int vlen = VEC_LEN; + float cutoff = CUTOFF; + + float* center_point_array; + float* target; + float* target_generic; + float* src0 ; + + + ret = posix_memalign((void**)¢er_point_array, 16, 24); + ret = posix_memalign((void**)&target, 16, 4); + ret = posix_memalign((void**)&target_generic, 16, 4); + ret = posix_memalign((void**)&src0, 16, (vlen << 2)); + + + random_floats((float*)src0, vlen); + + float a = (float)CENTER; + float etoa = expf(a); + center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 + + (-4.0 * a * a * a)/24.0 + + (3.0 * a * a)/6.0 + + (-2.0 * a)/2.0 + + (1.0)) * etoa; + center_point_array[1] = (//(-10.0 * a * a * a)/120.0 + + (6.0 * a * a)/24.0 + + (-3.0 * a)/6.0 + + (1.0/2.0)) * etoa; + center_point_array[2] = (//(10.0 * a * a)/120.0 + + (-4.0 * a)/24.0 + + (1.0/6.0)) * etoa; + center_point_array[3] = (//(-5.0 * a)/120.0 + + (1.0/24.0)) * etoa; + //center_point_array[4] = ((1.0)/120.0) * etoa; + center_point_array[4] = (//(a * a * a * a * a)/120.0 + + (a * a * a * a)/24.0 + + (a * a * a)/-6.0 + + (a * a)/2.0 + + -a + 1.0) * etoa; + + printf("32f_sum_of_poly_aligned16\n"); + + clock_t start, end; + double total; + + float my_sum = 0.0; + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + float sum = 0.0; + for(int l = 0; l < vlen; ++l) { + + sum += expf(src0[l]); + + } + my_sum = sum; + } + + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("exp time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + + volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic"); + + } + + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3 approx time: %f\n", total); + + + + printf("exp: %f, sse3: %f\n", my_sum, target[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA); + + + free(center_point_array); + free(target); + free(target_generic); + free(src0); + + +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h new file mode 100644 index 000000000..67a347f9a --- /dev/null +++ b/volk/lib/qa_32f_sum_of_poly_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H +#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H + +#include +#include + +class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc new file mode 100644 index 000000000..4eba0a3cd --- /dev/null +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifdef LV_HAVE_SSE3 +void qa_32fc_32f_multiply_aligned16::t1() { + + const int vlen = 2046; + const int ITERS = 100000; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + float * taps; + int i; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); + + random_floats((float*)input, vlen * 2); + random_floats(taps, vlen); + + printf("32fc_32f_multiply_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} +#else +void qa_32fc_32f_multiply_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE3 */ + diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h new file mode 100644 index 000000000..fc3b3eeb2 --- /dev/null +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H +#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H + +#include +#include + +class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc new file mode 100644 index 000000000..64ea65da9 --- /dev/null +++ b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1.5e-3) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifdef LV_HAVE_SSE +void qa_32fc_32f_power_32fc_aligned16::t1() { + + const int vlen = 2046; + const int ITERS = 10000; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + int i; + + std::complex* result_generic; + std::complex* result_sse; + + ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); + ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float)); + + random_floats((float*)input, vlen * 2); + + const float power = 3.2; + + printf("32fc_32f_power_32fc_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA); + } + + free(input); + free(result_generic); + free(result_sse); + +} +#else +void qa_32fc_32f_power_32fc_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE */ + diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h new file mode 100644 index 000000000..464b7b7cc --- /dev/null +++ b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H +#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H + +#include +#include + +class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc new file mode 100644 index 000000000..a24382d71 --- /dev/null +++ b/volk/lib/qa_32fc_atan2_32f_aligned16.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32fc_atan2_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32fc_atan2_32f_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_atan2_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h new file mode 100644 index 000000000..9c4dc209a --- /dev/null +++ b/volk/lib/qa_32fc_atan2_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc new file mode 100644 index 000000000..497914e0a --- /dev/null +++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc @@ -0,0 +1,137 @@ +#include +#include +#include +#include + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse + +#if LV_HAVE_SSE && LV_HAVE_64 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_conjugate_dot_prod_aligned16::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse"); + + printf("32fc_conjugate_dot_prod_aligned16\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#elif LV_HAVE_SSE && LV_HAVE_32 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_conjugate_dot_prod_aligned16::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32"); + + printf("32fc_conjugate_dot_prod_aligned16\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#else + +void qa_32fc_conjugate_dot_prod_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h new file mode 100644 index 000000000..507b1769b --- /dev/null +++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H +#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H + +#include +#include + +class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc new file mode 100644 index 000000000..0f5a030f5 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32fc_deinterleave_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32fc_deinterleave_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_generic1[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse1[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_deinterleave_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h new file mode 100644 index 000000000..78660e6ad --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc new file mode 100644 index 000000000..6e051afbc --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32fc_deinterleave_64f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32fc_deinterleave_64f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + double output_generic[vlen] __attribute__ ((aligned (16))); + double output_generic1[vlen] __attribute__ ((aligned (16))); + double output_sse2[vlen] __attribute__ ((aligned (16))); + double output_sse21[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_deinterleave_64f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h new file mode 100644 index 000000000..f924b9752 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H +#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H + +#include +#include + +class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc new file mode 100644 index 000000000..850518524 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32fc_deinterleave_real_16s_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32fc_deinterleave_real_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_deinterleave_real_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h new file mode 100644 index 000000000..68b80f27d --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H +#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H + +#include +#include + +class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc new file mode 100644 index 000000000..321deb184 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32fc_deinterleave_real_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32fc_deinterleave_real_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h new file mode 100644 index 000000000..765450bb6 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc new file mode 100644 index 000000000..aedb2e387 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_32fc_deinterleave_real_64f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32fc_deinterleave_real_64f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + double output_generic[vlen] __attribute__ ((aligned (16))); + double output_sse2[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_deinterleave_real_64f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h new file mode 100644 index 000000000..3e55fb812 --- /dev/null +++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H +#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H + +#include +#include + +class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc new file mode 100644 index 000000000..bcf9ea954 --- /dev/null +++ b/volk/lib/qa_32fc_dot_prod_aligned16.cc @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include + + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + + + +#if LV_HAVE_SSE3 +void qa_32fc_dot_prod_aligned16::t1() { + + const int vlen = 2046; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result_sse3, 16, 8); + + + result_generic[0] = std::complex(0,0); + result_sse3[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + printf("32fc_dot_prod_aligned16\n"); + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); + + + assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} + +#else +void qa_32fc_dot_prod_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif + +#if LV_HAVE_SSE && LV_HAVE_32 +void qa_32fc_dot_prod_aligned16::t2() { + + const int vlen = 2046; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result_sse3, 16, 8); + + + result_generic[0] = std::complex(0,0); + result_sse3[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + printf("32fc_dot_prod_aligned16\n"); + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_32_time: %f\n", total); + + printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); + + + assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} + +#else +void qa_32fc_dot_prod_aligned16::t2() { + printf("sse_32 not available... no test performed\n"); +} + +#endif + +#if LV_HAVE_SSE && LV_HAVE_64 + +void qa_32fc_dot_prod_aligned16::t3() { + + const int vlen = 2046; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result_sse3, 16, 8); + + + result_generic[0] = std::complex(0,0); + result_sse3[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + printf("32fc_dot_prod_aligned16\n"); + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + + start = clock(); + volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64"); + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_64_time: %f\n", total); + + printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); + + + assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} + +#else +void qa_32fc_dot_prod_aligned16::t3() { + printf("sse_64 not available... no test performed\n"); +} + + + +#endif diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h new file mode 100644 index 000000000..4b360db27 --- /dev/null +++ b/volk/lib/qa_32fc_dot_prod_aligned16.h @@ -0,0 +1,20 @@ +#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H +#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H + +#include +#include + +class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); + void t2 (); + void t3 (); +}; + + +#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc b/volk/lib/qa_32fc_index_max_aligned16.cc new file mode 100644 index 000000000..4d83f1639 --- /dev/null +++ b/volk/lib/qa_32fc_index_max_aligned16.cc @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) +#define NUM_ITERS 1000000 +#define VEC_LEN 3096 +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_index_max_aligned16::t1(){ + printf("sse3 not available... no test performed\n"); +} + +#else + + +void qa_32fc_index_max_aligned16::t1(){ + + const int vlen = VEC_LEN; + + volk_environment_init(); + int ret; + + unsigned int* target; + unsigned int* target_generic; + std::complex* src0 ; + + + unsigned int i_target; + target = &i_target; + unsigned int i_target_generic; + target_generic = &i_target_generic; + ret = posix_memalign((void**)&src0, 16, vlen << 3); + + random_floats((float*)src0, vlen * 2); + + printf("32fc_index_max_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3 time: %f\n", total); + + + + + printf("generic: %u, sse3: %u\n", target_generic[0], target[0]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1); + + + + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_index_max_aligned16.h b/volk/lib/qa_32fc_index_max_aligned16.h new file mode 100644 index 000000000..0990bcb1f --- /dev/null +++ b/volk/lib/qa_32fc_index_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H +#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H + +#include +#include + +class qa_32fc_index_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc new file mode 100644 index 000000000..a4be1616b --- /dev/null +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_magnitude_16s_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_32fc_magnitude_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse[vlen] __attribute__ ((aligned (16))); + int16_t output_sse3[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_magnitude_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + } +} + +#endif diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h new file mode 100644 index 000000000..ffdf1dd9e --- /dev/null +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H +#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H + +#include +#include + +class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc new file mode 100644 index 000000000..d69ada408 --- /dev/null +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_magnitude_32f_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_32fc_magnitude_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse3[vlen] __attribute__ ((aligned (16))); + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_magnitude_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + } +} + +#endif diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h new file mode 100644 index 000000000..a2881308c --- /dev/null +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc new file mode 100644 index 000000000..e1f7eab3d --- /dev/null +++ b/volk/lib/qa_32fc_multiply_aligned16.cc @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include + + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-3) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifdef LV_HAVE_SSE3 +void qa_32fc_multiply_aligned16::t1() { + + const int vlen = 2046; + const int ITERS = 100000; + + int i; + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + printf("32fc_multiply_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} +#else +void qa_32fc_multiply_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE3 */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h new file mode 100644 index 000000000..c8abaa8fe --- /dev/null +++ b/volk/lib/qa_32fc_multiply_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H +#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H + +#include +#include + +class qa_32fc_multiply_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc new file mode 100644 index 000000000..83cdf4b15 --- /dev/null +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +//test for sse3 + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_power_spectral_density_32f_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_32fc_power_spectral_density_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse3[vlen] __attribute__ ((aligned (16))); + + const float scalar = vlen; + const float rbw = 1.7; + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_power_spectral_density_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); + } +} + +#endif diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h new file mode 100644 index 000000000..26f430bec --- /dev/null +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc new file mode 100644 index 000000000..4d1359068 --- /dev/null +++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +//test for sse3 + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_power_spectrum_32f_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_32fc_power_spectrum_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse3[vlen] __attribute__ ((aligned (16))); + + const float scalar = vlen; + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + + printf("32fc_power_spectrum_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); + } +} + +#endif diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h new file mode 100644 index 000000000..d991223f3 --- /dev/null +++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc new file mode 100644 index 000000000..d9ead8495 --- /dev/null +++ b/volk/lib/qa_32fc_square_dist_aligned16.cc @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) +#define NUM_ITERS 10000000 +#define VEC_LEN 64 +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_square_dist_aligned16::t1(){ + printf("sse3 not available... no test performed\n"); +} + +#else + + +void qa_32fc_square_dist_aligned16::t1(){ + int i = 0; + + const int vlen = VEC_LEN; + volk_environment_init(); + int ret; + + float* target; + float* target_generic; + std::complex* src0 ; + std::complex* points; + + ret = posix_memalign((void**)&points, 16, vlen << 3); + ret = posix_memalign((void**)&target, 16, vlen << 2); + ret = posix_memalign((void**)&target_generic, 16, vlen << 2); + ret = posix_memalign((void**)&src0, 16, 8); + + random_floats((float*)points, vlen * 2); + random_floats((float*)src0, 2); + + printf("32fc_square_dist_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3 time: %f\n", total); + + + + for(; i < vlen; ++i) { + //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA); + } + + free(target); + free(target_generic); + free(points); + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h new file mode 100644 index 000000000..9d365d8b0 --- /dev/null +++ b/volk/lib/qa_32fc_square_dist_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H +#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H + +#include +#include + +class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc new file mode 100644 index 000000000..f923d1d5c --- /dev/null +++ b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA .0001 +#define NUM_ITERS 10000000 +#define VEC_LEN 64 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ + printf("sse3 not available... no test performed\n"); +} + +#else + + +void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ + int i = 0; + + const int vlen = VEC_LEN; + + volk_environment_init(); + int ret; + + float* target; + float* target_generic; + std::complex* src0 ; + std::complex* points; + float scalar; + + ret = posix_memalign((void**)&points, 16, vlen << 3); + ret = posix_memalign((void**)&target, 16, vlen << 2); + ret = posix_memalign((void**)&target_generic, 16, vlen << 2); + ret = posix_memalign((void**)&src0, 16, 8); + + random_floats((float*)points, vlen * 2); + random_floats((float*)src0, 2); + random_floats(&scalar, 1); + + printf("32fc_square_dist_scalar_mult_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3 time: %f\n", total); + + + + for(i = 0; i < vlen; ++i) { + printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA); + } + + free(target); + free(target_generic); + free(points); + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h new file mode 100644 index 000000000..ac4e3c45b --- /dev/null +++ b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H +#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H + +#include +#include + +class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc new file mode 100644 index 000000000..72d05cf6f --- /dev/null +++ b/volk/lib/qa_32s_and_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32s_and_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32s_and_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int32_t input0[vlen] __attribute__ ((aligned (16))); + int32_t input1[vlen] __attribute__ ((aligned (16))); + + int32_t output0[vlen] __attribute__ ((aligned (16))); + int32_t output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); + input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); + } + printf("32s_and_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h new file mode 100644 index 000000000..dfcb47c63 --- /dev/null +++ b/volk/lib/qa_32s_and_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H +#define INCLUDED_QA_32S_AND_ALIGNED16_H + +#include +#include + +class qa_32s_and_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32s_and_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc new file mode 100644 index 000000000..eab3fe016 --- /dev/null +++ b/volk/lib/qa_32s_convert_32f_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32s_convert_32f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32s_convert_32f_aligned16::t1() { + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + + int32_t input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("32s_convert_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h new file mode 100644 index 000000000..efd2a2eea --- /dev/null +++ b/volk/lib/qa_32s_convert_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H +#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H + +#include +#include + +class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc new file mode 100644 index 000000000..0e504cfa1 --- /dev/null +++ b/volk/lib/qa_32s_convert_32f_unaligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_32s_convert_32f_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32s_convert_32f_unaligned16::t1() { + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + + int32_t input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); + } + printf("32s_convert_32f_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h new file mode 100644 index 000000000..5006f5fd8 --- /dev/null +++ b/volk/lib/qa_32s_convert_32f_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H +#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H + +#include +#include + +class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc new file mode 100644 index 000000000..e09dfb91c --- /dev/null +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32s_or_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32s_or_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int32_t input0[vlen] __attribute__ ((aligned (16))); + int32_t input1[vlen] __attribute__ ((aligned (16))); + + int32_t output0[vlen] __attribute__ ((aligned (16))); + int32_t output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); + input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); + } + printf("32s_or_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h new file mode 100644 index 000000000..9e949eb52 --- /dev/null +++ b/volk/lib/qa_32s_or_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H +#define INCLUDED_QA_32S_OR_ALIGNED16_H + +#include +#include + +class qa_32s_or_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32s_or_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc new file mode 100644 index 000000000..8b1023876 --- /dev/null +++ b/volk/lib/qa_32u_byteswap_aligned16.cc @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_32u_byteswap_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_32u_byteswap_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100001; + + uint32_t output0[vlen] __attribute__ ((aligned (16))); + uint32_t output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); + } + memcpy(output01, output0, vlen*sizeof(uint32_t)); + printf("32u_byteswap_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32u_byteswap_aligned16_manual(output0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h new file mode 100644 index 000000000..47bad4c3d --- /dev/null +++ b/volk/lib/qa_32u_byteswap_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H +#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H + +#include +#include + +class qa_32u_byteswap_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc new file mode 100644 index 000000000..49fcddeb2 --- /dev/null +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_2 + +void qa_32u_popcnt_aligned16::t1() { + printf("sse4.2 not available... no test performed\n"); +} + +#else + +void qa_32u_popcnt_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + + const int ITERS = 10000000; + uint32_t input0 __attribute__ ((aligned (16))); + + uint32_t output0 __attribute__ ((aligned (16))); + uint32_t output01 __attribute__ ((aligned (16))); + + input0 = ((uint32_t) (rand() - (RAND_MAX/2))); + output0 = 0; + output01 = 0; + + printf("32u_popcnt_aligned\n"); + + start = clock(); + uint32_t ret = 0; + for(int count = 0; count < ITERS; ++count) { + volk_32u_popcnt_aligned16_manual(&ret, input0, "generic"); + output0 += ret; + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + ret = 0; + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_32u_popcnt_aligned16(&ret, input0); + output01 += ret; + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.2_time: %f\n", total); + + + CPPUNIT_ASSERT_EQUAL(output0, output01); +} + +#endif diff --git a/volk/lib/qa_32u_popcnt_aligned16.h b/volk/lib/qa_32u_popcnt_aligned16.h new file mode 100644 index 000000000..fa1dc1041 --- /dev/null +++ b/volk/lib/qa_32u_popcnt_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32U_POPCNT_ALIGNED16_H +#define INCLUDED_QA_32U_POPCNT_ALIGNED16_H + +#include +#include + +class qa_32u_popcnt_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32u_popcnt_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32U_POPCNT_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc new file mode 100644 index 000000000..0eaebf00a --- /dev/null +++ b/volk/lib/qa_64f_convert_32f_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_64f_convert_32f_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_64f_convert_32f_aligned16::t1() { + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + + double input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("64f_convert_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h new file mode 100644 index 000000000..95d79f73d --- /dev/null +++ b/volk/lib/qa_64f_convert_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H +#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H + +#include +#include + +class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc new file mode 100644 index 000000000..dcf94bd27 --- /dev/null +++ b/volk/lib/qa_64f_convert_32f_unaligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_64f_convert_32f_unaligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_64f_convert_32f_unaligned16::t1() { + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + + double input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse2[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("64f_convert_32f_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + } +} + +#endif diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h new file mode 100644 index 000000000..430327e81 --- /dev/null +++ b/volk/lib/qa_64f_convert_32f_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H +#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H + +#include +#include + +class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc new file mode 100644 index 000000000..41ab078b0 --- /dev/null +++ b/volk/lib/qa_64f_max_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_64f_max_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_64f_max_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + double input0[vlen] __attribute__ ((aligned (16))); + double input1[vlen] __attribute__ ((aligned (16))); + + double output0[vlen] __attribute__ ((aligned (16))); + double output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("64f_max_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h new file mode 100644 index 000000000..7cbd4d4c1 --- /dev/null +++ b/volk/lib/qa_64f_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H +#define INCLUDED_QA_64F_MAX_ALIGNED16_H + +#include +#include + +class qa_64f_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64f_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc new file mode 100644 index 000000000..b4664d065 --- /dev/null +++ b/volk/lib/qa_64f_min_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_64f_min_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_64f_min_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + double input0[vlen] __attribute__ ((aligned (16))); + double input1[vlen] __attribute__ ((aligned (16))); + + double output0[vlen] __attribute__ ((aligned (16))); + double output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("64f_min_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h new file mode 100644 index 000000000..a0e95395f --- /dev/null +++ b/volk/lib/qa_64f_min_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H +#define INCLUDED_QA_64F_MIN_ALIGNED16_H + +#include +#include + +class qa_64f_min_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64f_min_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc new file mode 100644 index 000000000..4f5d4d02b --- /dev/null +++ b/volk/lib/qa_64u_byteswap_aligned16.cc @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE2 + +void qa_64u_byteswap_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_64u_byteswap_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100001; + + uint64_t output0[vlen] __attribute__ ((aligned (16))); + uint64_t output01[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); + } + memcpy(output01, output0, vlen*sizeof(uint64_t)); + printf("64u_byteswap_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64u_byteswap_aligned16_manual(output0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + } +} + +#endif diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h new file mode 100644 index 000000000..a4fa0c983 --- /dev/null +++ b/volk/lib/qa_64u_byteswap_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H +#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H + +#include +#include + +class qa_64u_byteswap_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc new file mode 100644 index 000000000..bce9ff6c2 --- /dev/null +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_2 + +void qa_64u_popcnt_aligned16::t1() { + printf("sse4.2 not available... no test performed\n"); +} + +#else + +void qa_64u_popcnt_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + + const int ITERS = 10000000; + uint64_t input0 __attribute__ ((aligned (16))); + + uint64_t output0 __attribute__ ((aligned (16))); + uint64_t output01 __attribute__ ((aligned (16))); + + input0 = ((uint64_t) (rand() - (RAND_MAX/2))); + output0 = 0; + output01 = 0; + + printf("64u_popcnt_aligned\n"); + + start = clock(); + uint64_t ret = 0; + for(int count = 0; count < ITERS; ++count) { + volk_64u_popcnt_aligned16_manual(&ret, input0, "generic"); + output0 += ret; + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + ret = 0; + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_64u_popcnt_aligned16(&ret, input0); + output01 += ret; + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.2_time: %f\n", total); + + + CPPUNIT_ASSERT_EQUAL(output0, output01); +} + +#endif diff --git a/volk/lib/qa_64u_popcnt_aligned16.h b/volk/lib/qa_64u_popcnt_aligned16.h new file mode 100644 index 000000000..217822d6e --- /dev/null +++ b/volk/lib/qa_64u_popcnt_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_64U_POPCNT_ALIGNED16_H +#define INCLUDED_QA_64U_POPCNT_ALIGNED16_H + +#include +#include + +class qa_64u_popcnt_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_64u_popcnt_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_64U_POPCNT_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc new file mode 100644 index 000000000..35f08fb81 --- /dev/null +++ b/volk/lib/qa_8s_convert_16s_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +//test for sse4_1 + +#ifndef LV_HAVE_SSE4_1 + +void qa_8s_convert_16s_aligned16::t1() { + printf("sse4.1 not available... no test performed\n"); +} + +#else + +void qa_8s_convert_16s_aligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int8_t input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); + } + printf("8s_convert_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h new file mode 100644 index 000000000..38739fc96 --- /dev/null +++ b/volk/lib/qa_8s_convert_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H +#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H + +#include +#include + +class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc new file mode 100644 index 000000000..bb326f818 --- /dev/null +++ b/volk/lib/qa_8s_convert_16s_unaligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +//test for sse4_1 + +#ifndef LV_HAVE_SSE4_1 + +void qa_8s_convert_16s_unaligned16::t1() { + printf("sse4.1 not available... no test performed\n"); +} + +#else + +void qa_8s_convert_16s_unaligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int8_t input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); + } + printf("8s_convert_16s_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h new file mode 100644 index 000000000..d39fffc35 --- /dev/null +++ b/volk/lib/qa_8s_convert_16s_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H +#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H + +#include +#include + +class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc new file mode 100644 index 000000000..522da0b9d --- /dev/null +++ b/volk/lib/qa_8s_convert_32f_aligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +//test for sse4.1 + +#ifndef LV_HAVE_SSE4_1 + +void qa_8s_convert_32f_aligned16::t1() { + printf("sse4_1 not available... no test performed\n"); +} + +#else + +void qa_8s_convert_32f_aligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int8_t input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); + } + printf("8s_convert_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h new file mode 100644 index 000000000..7f8401d42 --- /dev/null +++ b/volk/lib/qa_8s_convert_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H +#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H + +#include +#include + +class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc new file mode 100644 index 000000000..ea1fb7c74 --- /dev/null +++ b/volk/lib/qa_8s_convert_32f_unaligned16.cc @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +//test for sse4.1 + +#ifndef LV_HAVE_SSE4_1 + +void qa_8s_convert_32f_unaligned16::t1() { + printf("sse4_1 not available... no test performed\n"); +} + +#else + +void qa_8s_convert_32f_unaligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + int8_t input0[vlen+1] __attribute__ ((aligned (16))); + + float output_generic[vlen+1] __attribute__ ((aligned (16))); + float output_sse4_1[vlen+1] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); + } + printf("8s_convert_32f_unaligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%e...%e\n", output_generic[i], output_sse4_1[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h new file mode 100644 index 000000000..aad2f8c22 --- /dev/null +++ b/volk/lib/qa_8s_convert_32f_unaligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H +#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H + +#include +#include + +class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc new file mode 100644 index 000000000..823e7fe2e --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +void qa_8sc_deinterleave_16s_aligned16::t1() { + printf("sse4_1 not available... no test performed\n"); +} + +#else + +void qa_8sc_deinterleave_16s_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_generic1[vlen] __attribute__ ((aligned (16))); + int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); + int16_t output_sse4_11[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse4_11[i]); + } +} + +#endif diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h new file mode 100644 index 000000000..9c99fed70 --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H +#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H + +#include +#include + +class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc new file mode 100644 index 000000000..fb580516c --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +#ifndef LV_HAVE_SSE + +void qa_8sc_deinterleave_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_8sc_deinterleave_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_generic1[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse1[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* LV_HAVE_SSE */ + +#else + +void qa_8sc_deinterleave_32f_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_generic1[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse1[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + float output_sse14_1[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1_time: %f\n", total); + + for(int i = 0; i < vlen; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max((output_generic[i])*1e-4, 1e-4)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max((output_generic[i])*1e-4, 1e-4)); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max((output_generic[i])*1e-4, 1e-4)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max((output_generic[i])*1e-4, 1e-4)); + } +} + + +#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h new file mode 100644 index 000000000..63b5fdadb --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H +#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H + +#include +#include + +class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc new file mode 100644 index 000000000..1cc844b52 --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +void qa_8sc_deinterleave_real_16s_aligned16::t1() { + printf("sse4_1 not available... no test performed\n"); +} + +#else + +void qa_8sc_deinterleave_real_16s_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_real_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); + } +} + +#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h new file mode 100644 index 000000000..02050926f --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H +#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H + +#include +#include + +class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc new file mode 100644 index 000000000..10e537cde --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +#ifndef LV_HAVE_SSE + +void qa_8sc_deinterleave_real_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_8sc_deinterleave_real_32f_aligned16::t1() { + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* LV_HAVE_SSE */ + +#else + +void qa_8sc_deinterleave_real_32f_aligned16::t1() { + + + volk_runtime_init(); + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex *input0; + + float* output_generic; + float* output_sse; + float* output_sse4_1; + + ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t)); + ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float)); + ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float)); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0); + } + + printf("8sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); + } + + free(input0); + free(output_generic); + free(output_sse); + free(output_sse4_1); +} + +#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h new file mode 100644 index 000000000..93338e488 --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H +#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H + +#include +#include + +class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc new file mode 100644 index 000000000..d84df8119 --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSSE3 + +void qa_8sc_deinterleave_real_8s_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_8sc_deinterleave_real_8s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + int8_t output_generic[vlen] __attribute__ ((aligned (16))); + int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); + + int8_t* loadInput = (int8_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + printf("8sc_deinterleave_real_8s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + } +} + +#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h new file mode 100644 index 000000000..92fc0dd4a --- /dev/null +++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H +#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H + +#include +#include + +class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc new file mode 100644 index 000000000..d64eac8ce --- /dev/null +++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +#ifndef LV_HAVE_SSE4_1 + +void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { + printf("sse4.1 not available... no test performed\n"); +} + +#else + +void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 2046; + const int ITERS = 100000; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse4_1; + int i; + int8_t* inputInt8_T; + int8_t* tapsInt8_T; + + ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); + ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); + ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t)); + ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t)); + + inputInt8_T = (int8_t*)input; + tapsInt8_T = (int8_t*)taps; + for(int i = 0; i < vlen*2; ++i) { + inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + + printf("8sc_multiply_conjugate_16sc_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex*)result_generic, (std::complex*)input, (std::complex*)taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex*)result_sse4_1, (std::complex*)input, (std::complex*)taps, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); + + assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse4_1); + +} + +#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h new file mode 100644 index 000000000..0e78a5eca --- /dev/null +++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H +#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H + +#include +#include + +class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc new file mode 100644 index 000000000..c27f0e0ca --- /dev/null +++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +#ifndef LV_HAVE_SSE4_1 + +void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { + printf("sse4.1 not available... no test performed\n"); +} + +#else + +void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { + + + volk_runtime_init(); + + const int vlen = 2046; + const int ITERS = 100000; + + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse4_1; + int i; + int8_t* inputInt8_T; + int8_t* tapsInt8_T; + + ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); + ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); + ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float)); + + + inputInt8_T = (int8_t*)input; + tapsInt8_T = (int8_t*)taps; + for(int i = 0; i < vlen*2; ++i) { + inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); + } + + printf("8sc_multiply_conjugate_32fc_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); + assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse4_1); + +} + +#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h new file mode 100644 index 000000000..eb9ae309c --- /dev/null +++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H +#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H + +#include +#include + +class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc new file mode 100644 index 000000000..c3c27b69b --- /dev/null +++ b/volk/lib/qa_volk.cc @@ -0,0 +1,211 @@ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +/* + * This class gathers together all the test cases for the example + * directory into a single test suite. As you create new test cases, + * add them here. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CppUnit::TestSuite * +qa_volk::suite() +{ + CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); + + s->addTest(qa_16s_quad_max_star_aligned16::suite()); + s->addTest(qa_32fc_dot_prod_aligned16::suite()); + s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); + s->addTest(qa_32fc_square_dist_aligned16::suite()); + s->addTest(qa_32f_sum_of_poly_aligned16::suite()); + s->addTest(qa_32fc_index_max_aligned16::suite()); + s->addTest(qa_32f_index_max_aligned16::suite()); + s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); + s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); + s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); + s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); + s->addTest(qa_16s_max_star_aligned16::suite()); + s->addTest(qa_16s_add_quad_aligned16::suite()); + s->addTest(qa_32f_add_aligned16::suite()); + s->addTest(qa_32f_subtract_aligned16::suite()); + s->addTest(qa_32f_max_aligned16::suite()); + s->addTest(qa_32f_min_aligned16::suite()); + s->addTest(qa_64f_max_aligned16::suite()); + s->addTest(qa_64f_min_aligned16::suite()); + s->addTest(qa_32s_and_aligned16::suite()); + s->addTest(qa_32s_or_aligned16::suite()); + s->addTest(qa_32f_dot_prod_aligned16::suite()); + s->addTest(qa_32f_dot_prod_unaligned16::suite()); + s->addTest(qa_32f_fm_detect_aligned16::suite()); + s->addTest(qa_32fc_32f_multiply_aligned16::suite()); + s->addTest(qa_32fc_multiply_aligned16::suite()); + s->addTest(qa_32f_divide_aligned16::suite()); + s->addTest(qa_32f_multiply_aligned16::suite()); + s->addTest(qa_32f_sqrt_aligned16::suite()); + s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite()); + s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite()); + s->addTest(qa_32u_popcnt_aligned16::suite()); + s->addTest(qa_64u_popcnt_aligned16::suite()); + s->addTest(qa_16u_byteswap_aligned16::suite()); + s->addTest(qa_32u_byteswap_aligned16::suite()); + s->addTest(qa_64u_byteswap_aligned16::suite()); + s->addTest(qa_32f_normalize_aligned16::suite()); + s->addTest(qa_16sc_deinterleave_16s_aligned16::suite()); + s->addTest(qa_16sc_deinterleave_32f_aligned16::suite()); + s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite()); + s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite()); + s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite()); + s->addTest(qa_16sc_magnitude_16s_aligned16::suite()); + s->addTest(qa_16sc_magnitude_32f_aligned16::suite()); + s->addTest(qa_32fc_deinterleave_32f_aligned16::suite()); + s->addTest(qa_32fc_deinterleave_64f_aligned16::suite()); + s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite()); + s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite()); + s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite()); + s->addTest(qa_32fc_magnitude_16s_aligned16::suite()); + s->addTest(qa_32fc_magnitude_32f_aligned16::suite()); + s->addTest(qa_32f_interleave_16sc_aligned16::suite()); + s->addTest(qa_32f_interleave_32fc_aligned16::suite()); + s->addTest(qa_8sc_deinterleave_16s_aligned16::suite()); + s->addTest(qa_8sc_deinterleave_32f_aligned16::suite()); + s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite()); + s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite()); + s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite()); + s->addTest(qa_16s_convert_32f_aligned16::suite()); + s->addTest(qa_16s_convert_32f_unaligned16::suite()); + s->addTest(qa_16s_convert_8s_aligned16::suite()); + s->addTest(qa_16s_convert_8s_unaligned16::suite()); + s->addTest(qa_32f_convert_16s_aligned16::suite()); + s->addTest(qa_32f_convert_16s_unaligned16::suite()); + s->addTest(qa_32f_convert_32s_aligned16::suite()); + s->addTest(qa_32f_convert_32s_unaligned16::suite()); + s->addTest(qa_32f_convert_64f_aligned16::suite()); + s->addTest(qa_32f_convert_64f_unaligned16::suite()); + s->addTest(qa_32f_convert_8s_aligned16::suite()); + s->addTest(qa_32f_convert_8s_unaligned16::suite()); + s->addTest(qa_32s_convert_32f_aligned16::suite()); + s->addTest(qa_32s_convert_32f_unaligned16::suite()); + s->addTest(qa_64f_convert_32f_aligned16::suite()); + s->addTest(qa_64f_convert_32f_unaligned16::suite()); + s->addTest(qa_8s_convert_16s_aligned16::suite()); + s->addTest(qa_8s_convert_16s_unaligned16::suite()); + s->addTest(qa_8s_convert_32f_aligned16::suite()); + s->addTest(qa_8s_convert_32f_unaligned16::suite()); + s->addTest(qa_32fc_32f_power_32fc_aligned16::suite()); + s->addTest(qa_32f_power_aligned16::suite()); + s->addTest(qa_32fc_atan2_32f_aligned16::suite()); + s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite()); + s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite()); + s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite()); + s->addTest(qa_32f_accumulator_aligned16::suite()); + s->addTest(qa_32f_stddev_aligned16::suite()); + s->addTest(qa_32f_stddev_and_mean_aligned16::suite()); + + return s; +} diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h new file mode 100644 index 000000000..43fa7faba --- /dev/null +++ b/volk/lib/qa_volk.h @@ -0,0 +1,36 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU Example Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Example Public License for more details. + * + * You should have received a copy of the GNU Example Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_QA_VOLK_H +#define INCLUDED_QA_VOLK_H + +#include + +//! collect all the tests for the example directory + +class qa_volk { + public: + //! return suite of tests for all of example directory + static CppUnit::TestSuite *suite (); +}; + +#endif /* INCLUDED_QA_VOLK_H */ diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc new file mode 100644 index 000000000..50ac08eab --- /dev/null +++ b/volk/lib/test_all.cc @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2002,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + + int opt = 0; + std::string xmlOutputFile(""); + + while( (opt = getopt(argc, argv, "o:")) != -1){ + switch(opt){ + case 'o': + if(optarg){ + xmlOutputFile.assign(optarg); + } + else{ + std::cerr << "No xml file output specified for -o" << std::endl; + exit(EXIT_FAILURE); + } + break; + + default: /* '?' */ + fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n", + argv[0]); + exit(EXIT_FAILURE); + } + + } + + CppUnit::TextUi::TestRunner runner; + + runner.addTest (qa_volk::suite ()); + + bool was_successful = false; + if(!xmlOutputFile.empty()){ + std::ofstream xmlOutput(xmlOutputFile.c_str()); + if(xmlOutput.is_open()){ + runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput)); + + was_successful = runner.run("", false, true, false); + } + xmlOutput.close(); + } + else{ + was_successful = runner.run ("", false); + } + + return was_successful ? 0 : 1; +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c new file mode 100644 index 000000000..b1a93db26 --- /dev/null +++ b/volk/lib/volk_rank_archs.c @@ -0,0 +1,13 @@ +#include +#include + +unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) { + int i = 2; + unsigned int best_val = 0; + for(; i < arch_defs[0] + 1; ++i) { + if((arch_defs[i]&(!arch)) == 0) { + best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; + } + } + return best_val; +} diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h new file mode 100644 index 000000000..26b9f7503 --- /dev/null +++ b/volk/lib/volk_rank_archs.h @@ -0,0 +1,14 @@ +#ifndef INCLUDED_VOLK_RANK_ARCHS_H +#define INCLUDED_VOLK_RANK_ARCHS_H + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch); + + +#ifdef __cplusplus +} +#endif +#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/ -- cgit From f8b0c86d8a9eb347cb7187e3b01ed46c66de6a64 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 8 Dec 2010 01:09:35 -0500 Subject: volk: Adding gitignore files. --- volk/lib/.gitignore | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 volk/lib/.gitignore (limited to 'volk/lib') diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore new file mode 100644 index 000000000..573fb1618 --- /dev/null +++ b/volk/lib/.gitignore @@ -0,0 +1,21 @@ +/*.cache +/*.la +/*.lo +/*.pc +/.deps +/.la +/.libs +/.lo +/Makefile +/Makefile.in +/volk.c +/volk_cpu_generic.c +/volk_cpu_powerpc.c +/volk_cpu_x86.c +/volk_environment_init.c +/volk_init.c +/volk_init.h +/volk_mktables +/volk_mktables.c +/volk_proccpu_sim.c +/volk_runtime.c -- cgit From 74f206edb2c7bfbe010b5a5cbc5fe2f07965c3a6 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 8 Dec 2010 01:29:58 -0500 Subject: volk: Fixing makefiles for dist. Distcheck still failing on other issues now. --- volk/lib/Makefile.am | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 97eb75680..54df42d54 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -61,7 +61,7 @@ universal_CODE = \ volk_environment_init.c generic_CODE = \ - volk_cpu_generic.cc + volk_cpu_generic.c x86_CODE = \ volk_cpu_x86.c @@ -73,7 +73,7 @@ x86_64_SUBCODE = \ cpuid_x86_64.S powerpc_CODE = \ - volk_cpu_powerpc.cc + volk_cpu_powerpc.c if MD_CPU_generic @@ -236,6 +236,7 @@ libvolk_qa_la_LIBADD = \ noinst_HEADERS = \ volk_init.h \ qa_volk.h \ + assembly.h \ qa_16s_quad_max_star_aligned16.h \ qa_32fc_dot_prod_aligned16.h \ qa_32fc_square_dist_aligned16.h \ -- cgit From 46d55649012e4fb2838a6f8e9f3c9226ea8b2d50 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 8 Dec 2010 12:19:28 -0500 Subject: volk: Working on VPATH build issues. Makes it through configure, fails on make. --- volk/lib/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 54df42d54..4ee934e8b 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -46,6 +46,7 @@ lib_LTLIBRARIES = \ libvolk_runtime.la \ libvolk_qa.la +EXTRA_DIST = volk_mktables.c # ---------------------------------------------------------------- # The main library -- cgit From 1cc88091470dd4654b6936cda92d81841e135209 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 8 Dec 2010 17:00:38 -0500 Subject: volk: more changes to build system so that VPATH builds properly and project makes distcheck. --- volk/lib/Makefile.am | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 4ee934e8b..7e808695f 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2008 Free Software Foundation, Inc. +# Copyright 2010 Free Software Foundation, Inc. # # This file is part of GNU Radio # @@ -20,7 +20,9 @@ include $(top_srcdir)/Makefile.common -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) $(LV_CXXFLAGS) +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ + -I$(top_builddir)/include \ + $(LV_CXXFLAGS) $(WITH_INCLUDES) # We build 2 libraries and 1 executable here. One library contains @@ -46,7 +48,10 @@ lib_LTLIBRARIES = \ libvolk_runtime.la \ libvolk_qa.la -EXTRA_DIST = volk_mktables.c +EXTRA_DIST = \ + volk_mktables.c \ + volk_rank_archs.h \ + volk_proccpu_sim.c # ---------------------------------------------------------------- # The main library -- cgit From a8f33e1b577342fd8149d9308d474871c44c7d52 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 8 Dec 2010 17:26:40 -0500 Subject: Removing autotests of volk during make check and distchecks since they take a long time to run. These can be run by hand by executing volk/lib/test_all Also made a comment about needing a possible fix for this makefile. --- volk/lib/Makefile.am | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 7e808695f..a95860d11 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -20,6 +20,10 @@ include $(top_srcdir)/Makefile.common +#FIXME: forcing the top_builddir for distcheck seems like a bit +# of a hack. Figure out the right way to do this to find built +# volk_config.h and volk_tables.h + AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ -I$(top_builddir)/include \ $(LV_CXXFLAGS) $(WITH_INCLUDES) @@ -40,7 +44,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" -TESTS = test_all +#TESTS = test_all lib_LTLIBRARIES = \ -- cgit From f3c684751dc3da3a06d5960d8b961739bdf0fd12 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 9 Dec 2010 17:34:29 -0500 Subject: volk: adding generic QA test for 16sc_magnitude_32f. --- volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 42 ++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 06dff2fd5..2c9e48f6e 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -8,7 +8,47 @@ #ifndef LV_HAVE_SSE3 void qa_16sc_magnitude_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_known[vlen] __attribute__ ((aligned (16))); + + int16_t* inputLoad = (int16_t*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2)); + } + printf("16sc_magnitude_32f_aligned\n"); + + float scale = 32768.0; + for(int i = 0; i < vlen; ++i) { + float re = (float)(input0[i].real())/scale; + float im = (float)(input0[i].imag())/scale; + output_known[i] = sqrt(re*re + im*im); + } + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + /* + for(int i = 0; i < 100; ++i) { + printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag()); + printf("generic... %f == %f\n", output_generic[i], output_known[i]); + } + */ + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); + } } #else -- cgit From 31c85c66f38ed304db06e0696b3df1d2407378c8 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 9 Dec 2010 17:53:05 -0500 Subject: volk: Adding a few more generic-only test cases. --- volk/lib/qa_32f_add_aligned16.cc | 55 ++++++++++++++++++++++++++++++++++- volk/lib/qa_32f_divide_aligned16.cc | 55 ++++++++++++++++++++++++++++++++++- volk/lib/qa_32f_multiply_aligned16.cc | 55 ++++++++++++++++++++++++++++++++++- volk/lib/qa_32f_sqrt_aligned16.cc | 53 +++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc index 92f35c7ec..002aebfc9 100644 --- a/volk/lib/qa_32f_add_aligned16.cc +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -1,3 +1,22 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, see + * . + */ + #include #include #include @@ -8,7 +27,41 @@ #ifndef LV_HAVE_SSE void qa_32f_add_aligned16::t1() { - printf("sse not available... no test performed\n"); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output_known[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + output_known[i] = input0[i] + input1[i]; + } + printf("32f_add_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + /* + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i], input1[i]); + printf("generic... %f == %f\n", output0[i], output_known[i]); + } + */ + + for(int i = 0; i < vlen; ++i) { + CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + } } #else diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index b20999beb..8826bf94f 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -1,3 +1,22 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, see + * . + */ + #include #include #include @@ -8,7 +27,41 @@ #ifndef LV_HAVE_SSE void qa_32f_divide_aligned16::t1() { - printf("sse not available... no test performed\n"); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output_known[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + output_known[i] = input0[i] / input1[i]; + } + printf("32f_divide_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + /* + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i], input1[i]); + printf("generic... %f == %f\n", output0[i], output_known[i]); + } + */ + + for(int i = 0; i < vlen; ++i) { + CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + } } #else diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc index c77fe97da..e52748466 100644 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -1,3 +1,22 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, see + * . + */ + #include #include #include @@ -8,7 +27,41 @@ #ifndef LV_HAVE_SSE void qa_32f_multiply_aligned16::t1() { - printf("sse not available... no test performed\n"); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + float input0[vlen] __attribute__ ((aligned (16))); + float input1[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output_known[vlen] __attribute__ ((aligned (16))); + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + output_known[i] = input0[i] * input1[i]; + } + printf("32f_multiply_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + /* + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i], input1[i]); + printf("generic... %f == %f\n", output0[i], output_known[i]); + } + */ + + for(int i = 0; i < vlen; ++i) { + CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + } } #else diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc index a3e6abc18..9a5f71de0 100644 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -1,3 +1,22 @@ +/* -*- c++ -*- */ +/* + * Copyright 2010 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, see + * . + */ + #include #include #include @@ -9,6 +28,40 @@ void qa_32f_sqrt_aligned16::t1() { printf("sse not available... no test performed\n"); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + float input0[vlen] __attribute__ ((aligned (16))); + + float output0[vlen] __attribute__ ((aligned (16))); + float output_known[vlen] __attribute__ ((aligned (16))); + + // No reason to test negative numbers because they result in NaN. + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); + output_known[i] = sqrt(input0[i]); + } + printf("32f_sqrt_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + /* + for(int i = 0; i < 10; ++i) { + printf("inputs: %f\n", input0[i]); + printf("generic... %f == %f\n", output0[i], output_known[i]); + } + */ + + for(int i = 0; i < vlen; ++i) { + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4); + } } #else -- cgit From 8375fd6ca2f6e5edb923abe0d6341b6d4d2d1aae Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Fri, 10 Dec 2010 01:48:17 -0500 Subject: volk: Fixing build system to handle making volk_mktables, volk_tables.h, and volk_config.h instead of a standalone shell script. --- volk/lib/Makefile.am | 1 - 1 file changed, 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index a95860d11..814d438fd 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -353,7 +353,6 @@ distclean-local: rm -f volk_cpu_x86.c rm -f volk_init.c rm -f volk_init.h - rm -f volk_mktables rm -f volk_mktables.c rm -f volk_proccpu_sim.c rm -f volk_runtime.c -- cgit From bef3db60e73953f2d2ecdc6a86a81e11df3b103d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Mon, 13 Dec 2010 19:18:45 -0800 Subject: volk: committed some stuff i neglected --- volk/lib/Makefile.am | 17 +++++++++++------ volk/lib/qa_32f_sqrt_aligned16.cc | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 814d438fd..1291b01cd 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" #TESTS = test_all - +#orc stuff gets built in the ORC directory conditional to ORC being enabled. +#it gets linked in during the build of libvolk as an added library. +#there might be a better way to do this. lib_LTLIBRARIES = \ libvolk.la \ @@ -72,6 +74,9 @@ universal_CODE = \ generic_CODE = \ volk_cpu_generic.c + +orc_CODE = \ + volk_cpu_orc.c x86_CODE = \ volk_cpu_x86.c @@ -133,10 +138,9 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 - -libvolk_la_LIBADD = +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_la_LIBADD = ../orc/libvolk_orc.a @@ -233,11 +237,12 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ + ../orc/libvolk_orc.a \ $(CPPUNIT_LIBS) # ---------------------------------------------------------------- diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc index 9a5f71de0..81d66dad7 100644 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -52,6 +52,14 @@ void qa_32f_sqrt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + /* for(int i = 0; i < 10; ++i) { printf("inputs: %f\n", input0[i]); @@ -92,6 +100,13 @@ void qa_32f_sqrt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); } -- cgit From 611526f9dfba0df4a1a49d47916706438ac194b3 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 01:00:29 -0800 Subject: Volk: Automated more automake for orc. Brought orcc generation in. Shared library libvolk_orc.la. Linking is hackery right now with specified -lorc-0.4 flags; this should change. Otherwise pretty much OK. --- volk/lib/Makefile.am | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 1291b01cd..649d461e0 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -138,9 +138,9 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 -libvolk_la_LIBADD = ../orc/libvolk_orc.a +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_la_LIBADD = ../orc/libvolk_orc.la @@ -237,12 +237,12 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ - ../orc/libvolk_orc.a \ + ../orc/libvolk_orc.la \ $(CPPUNIT_LIBS) # ---------------------------------------------------------------- -- cgit From 05f4bced29987a0a573d1fc5b214f3fa01dc84bd Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 13:36:55 -0800 Subject: Volk: More autotools stuff for Orc. Should build OK with or without Orc now. --- volk/lib/Makefile.am | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 649d461e0..385401ae1 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -138,10 +138,13 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +if HAVE_ORC libvolk_la_LIBADD = ../orc/libvolk_orc.la - +libvolk_la_LDFLAGS += -lorc-0.4 +libvolk_runtime_la_LDFLAGS += -lorc-0.4 +endif # ---------------------------------------------------------------- @@ -237,13 +240,18 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ - ../orc/libvolk_orc.la \ $(CPPUNIT_LIBS) + +if HAVE_ORC +libvolk_qa_la_LIBADD += \ + ../orc/libvolk_orc.la + libvolk_qa_la_LDFLAGS += -lorc-0.4 +endif # ---------------------------------------------------------------- # headers that don't get installed -- cgit From d8031649fa3186d7e6b000dcfaa349deacf51262 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 16:41:14 -0800 Subject: Volk: patch via Nick M. --- volk/lib/Makefile.am | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 385401ae1..d38004f2a 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -74,9 +74,6 @@ universal_CODE = \ generic_CODE = \ volk_cpu_generic.c - -orc_CODE = \ - volk_cpu_orc.c x86_CODE = \ volk_cpu_x86.c @@ -356,7 +353,7 @@ noinst_PROGRAMS = \ test_all test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk_qa.la +test_all_LDADD = libvolk_qa.la ../orc/libvolk_orc.la distclean-local: -- cgit From 2e9a7d350713b4e1b21458db8f3fce8a557858ae Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 17:13:40 -0800 Subject: Volk: Added QA tests for all the Orc stuff. Added a 16u_byteswap but it's broken right now. --- volk/lib/qa_16u_byteswap_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_add_aligned16.cc | 9 +++++++++ volk/lib/qa_32s_and_aligned16.cc | 9 +++++++++ volk/lib/qa_8s_convert_32f_aligned16.cc | 8 ++++++++ 4 files changed, 35 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index 6b19828a4..c30b6ba41 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -24,6 +24,7 @@ void qa_16u_byteswap_aligned16::t1() { uint16_t output0[vlen] __attribute__ ((aligned (16))); uint16_t output01[vlen] __attribute__ ((aligned (16))); + uint16_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_16u_byteswap_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16u_byteswap_aligned16_manual(output02, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); } @@ -54,6 +62,7 @@ void qa_16u_byteswap_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc index 002aebfc9..d9214e8a2 100644 --- a/volk/lib/qa_32f_add_aligned16.cc +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -78,6 +78,7 @@ void qa_32f_add_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -93,6 +94,13 @@ void qa_32f_add_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -107,6 +115,7 @@ void qa_32f_add_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc index 72d05cf6f..5720ee869 100644 --- a/volk/lib/qa_32s_and_aligned16.cc +++ b/volk/lib/qa_32s_and_aligned16.cc @@ -25,6 +25,7 @@ void qa_32s_and_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -40,6 +41,13 @@ void qa_32s_and_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32s_and_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc index 522da0b9d..3b3aa6919 100644 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ b/volk/lib/qa_8s_convert_32f_aligned16.cc @@ -40,6 +40,14 @@ void qa_8s_convert_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { -- cgit From 87a9b14e0b0e2c2d0dcd75d42f2a15211265f102 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 17:44:34 -0800 Subject: Volk: added references to libs instead of specifying them directly --- volk/lib/Makefile.am | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index d38004f2a..faab4a010 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -133,14 +133,21 @@ libvolk_runtime_la_SOURCES = \ $(universal_runtime_CODE) endif +volk_orc_LDFLAGS = \ + $(ORC_LDFLAGS) \ + -lorc-0.4 + +volk_orc_LIBADD = \ + ../orc/libvolk_orc.la - +if HAVE_ORC +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) +else libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -if HAVE_ORC -libvolk_la_LIBADD = ../orc/libvolk_orc.la -libvolk_la_LDFLAGS += -lorc-0.4 -libvolk_runtime_la_LDFLAGS += -lorc-0.4 +libvolk_la_LIBADD = endif @@ -243,12 +250,6 @@ libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ $(CPPUNIT_LIBS) - -if HAVE_ORC -libvolk_qa_la_LIBADD += \ - ../orc/libvolk_orc.la - libvolk_qa_la_LDFLAGS += -lorc-0.4 -endif # ---------------------------------------------------------------- # headers that don't get installed @@ -353,7 +354,7 @@ noinst_PROGRAMS = \ test_all test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk_qa.la ../orc/libvolk_orc.la +test_all_LDADD = libvolk_qa.la distclean-local: -- cgit From 21426265324c883c91eeaaf75a81f2ccdc6e249d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 21:12:49 -0800 Subject: Volk: Build fixes to work with/without Orc. --- volk/lib/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index faab4a010..253033461 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -140,7 +140,7 @@ volk_orc_LDFLAGS = \ volk_orc_LIBADD = \ ../orc/libvolk_orc.la -if HAVE_ORC +if LV_HAVE_ORC libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) libvolk_la_LIBADD = $(volk_orc_LIBADD) -- cgit From f9ee6a55cb397f9302769a25a8c959fa162354f0 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 22:58:33 -0800 Subject: Volk: Some new basic Orc implementations with QA code --- volk/lib/qa_16u_byteswap_aligned16.cc | 1 + volk/lib/qa_32f_divide_aligned16.cc | 10 ++++++++++ volk/lib/qa_32f_multiply_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_subtract_aligned16.cc | 9 +++++++++ 4 files changed, 29 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index c30b6ba41..b740f91df 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -30,6 +30,7 @@ void qa_16u_byteswap_aligned16::t1() { output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(uint16_t)); + memcpy(output02, output0, vlen*sizeof(uint16_t)); printf("16u_byteswap_aligned\n"); diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index 8826bf94f..f104e0443 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -35,6 +35,7 @@ void qa_32f_divide_aligned16::t1() { float input1[vlen] __attribute__ ((aligned (16))); float output0[vlen] __attribute__ ((aligned (16))); + float output1[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { @@ -51,6 +52,14 @@ void qa_32f_divide_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 10; ++i) { @@ -61,6 +70,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]); } } diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc index e52748466..f9c034d70 100644 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -78,6 +78,7 @@ void qa_32f_multiply_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -93,6 +94,13 @@ void qa_32f_multiply_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -107,6 +115,7 @@ void qa_32f_multiply_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc index a7e1b5ae3..5a5a7c9b6 100644 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ b/volk/lib/qa_32f_subtract_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_subtract_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_subtract_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_subtract_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } -- cgit From 15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 15 Dec 2010 16:27:42 -0800 Subject: Volk: A bunch of new ORC routines plus tests. Also fixed a typo in the generic version of 16sc_magnitude_16s_a16. --- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 9 +++++++++ volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 20 ++++++++++++++++++++ volk/lib/qa_32f_divide_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 9 +++++++++ volk/lib/qa_32s_or_aligned16.cc | 9 +++++++++ 6 files changed, 65 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index b14610757..c8f13ff84 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 2c9e48f6e..e7178863c 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); int16_t* inputLoad = (int16_t*)input0; @@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 100; ++i) { @@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); } } @@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -79,6 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); @@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index f104e0443..b2c2ecf9a 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -103,6 +104,13 @@ void qa_32f_divide_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index a4be1616b..c3e65866b 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc index d69ada408..6a1d46c7a 100644 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc index e09dfb91c..9ea5283a6 100644 --- a/volk/lib/qa_32s_or_aligned16.cc +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -40,6 +41,13 @@ void qa_32s_or_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } -- cgit From ce3e4c33d170b65cf288faec7d8da6a496eb6101 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 16 Dec 2010 21:33:54 -0500 Subject: Including time header to qa files. --- volk/lib/qa_16s_add_quad_aligned16.cc | 2 +- volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 2 +- volk/lib/qa_16s_convert_32f_aligned16.cc | 1 + volk/lib/qa_16s_convert_32f_unaligned16.cc | 1 + volk/lib/qa_16s_convert_8s_aligned16.cc | 1 + volk/lib/qa_16s_convert_8s_unaligned16.cc | 1 + volk/lib/qa_16s_max_star_aligned16.cc | 2 +- volk/lib/qa_16s_max_star_horizontal_aligned16.cc | 2 +- volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc | 2 +- volk/lib/qa_16s_quad_max_star_aligned16.cc | 1 + volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 1 + volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 1 + volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc | 1 + volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 1 + volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 1 + volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 1 + volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 1 + volk/lib/qa_16u_byteswap_aligned16.cc | 1 + volk/lib/qa_32f_accumulator_aligned16.cc | 1 + volk/lib/qa_32f_add_aligned16.cc | 1 + volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc | 1 + volk/lib/qa_32f_convert_16s_aligned16.cc | 1 + volk/lib/qa_32f_convert_16s_unaligned16.cc | 1 + volk/lib/qa_32f_convert_32s_aligned16.cc | 1 + volk/lib/qa_32f_convert_32s_unaligned16.cc | 1 + volk/lib/qa_32f_convert_64f_aligned16.cc | 1 + volk/lib/qa_32f_convert_64f_unaligned16.cc | 1 + volk/lib/qa_32f_convert_8s_aligned16.cc | 1 + volk/lib/qa_32f_convert_8s_unaligned16.cc | 1 + volk/lib/qa_32f_divide_aligned16.cc | 1 + volk/lib/qa_32f_fm_detect_aligned16.cc | 1 + volk/lib/qa_32f_interleave_16sc_aligned16.cc | 1 + volk/lib/qa_32f_interleave_32fc_aligned16.cc | 1 + volk/lib/qa_32f_max_aligned16.cc | 1 + volk/lib/qa_32f_min_aligned16.cc | 1 + volk/lib/qa_32f_multiply_aligned16.cc | 1 + volk/lib/qa_32f_normalize_aligned16.cc | 1 + volk/lib/qa_32f_sqrt_aligned16.cc | 1 + volk/lib/qa_32f_stddev_aligned16.cc | 1 + volk/lib/qa_32f_stddev_and_mean_aligned16.cc | 1 + volk/lib/qa_32f_subtract_aligned16.cc | 1 + volk/lib/qa_32fc_atan2_32f_aligned16.cc | 1 + volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc | 1 + volk/lib/qa_32fc_deinterleave_32f_aligned16.cc | 1 + volk/lib/qa_32fc_deinterleave_64f_aligned16.cc | 1 + volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc | 1 + volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc | 1 + volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc | 1 + volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 1 + volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 1 + volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc | 1 + volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc | 1 + volk/lib/qa_32s_and_aligned16.cc | 1 + volk/lib/qa_32s_convert_32f_aligned16.cc | 1 + volk/lib/qa_32s_convert_32f_unaligned16.cc | 1 + volk/lib/qa_32s_or_aligned16.cc | 1 + volk/lib/qa_32u_byteswap_aligned16.cc | 1 + volk/lib/qa_32u_popcnt_aligned16.cc | 1 + volk/lib/qa_64f_convert_32f_aligned16.cc | 1 + volk/lib/qa_64f_convert_32f_unaligned16.cc | 1 + volk/lib/qa_64f_max_aligned16.cc | 1 + volk/lib/qa_64f_min_aligned16.cc | 1 + volk/lib/qa_64u_byteswap_aligned16.cc | 1 + volk/lib/qa_64u_popcnt_aligned16.cc | 1 + volk/lib/qa_8s_convert_16s_aligned16.cc | 1 + volk/lib/qa_8s_convert_16s_unaligned16.cc | 1 + volk/lib/qa_8s_convert_32f_aligned16.cc | 1 + volk/lib/qa_8s_convert_32f_unaligned16.cc | 1 + volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 1 + volk/lib/qa_8sc_deinterleave_32f_aligned16.cc | 1 + volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc | 1 + volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 1 + volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc | 1 + volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc | 2 +- volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc | 2 +- 75 files changed, 75 insertions(+), 7 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index c3005c1be..154aa0f17 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -2,7 +2,7 @@ #include #include #include -#include +#include //test for sse2 #ifndef LV_HAVE_SSE2 diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index ba5e8ed93..62deffaeb 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -1,7 +1,7 @@ #include #include #include -#include +#include //test for ssse3 diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc index 7878d4737..6215f4a64 100644 --- a/volk/lib/qa_16s_convert_32f_aligned16.cc +++ b/volk/lib/qa_16s_convert_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc index 8c3121e5c..46c2e48ac 100644 --- a/volk/lib/qa_16s_convert_32f_unaligned16.cc +++ b/volk/lib/qa_16s_convert_32f_unaligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc index 734b7784e..8225aa0cf 100644 --- a/volk/lib/qa_16s_convert_8s_aligned16.cc +++ b/volk/lib/qa_16s_convert_8s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc index 275ab7668..e6ce5030e 100644 --- a/volk/lib/qa_16s_convert_8s_unaligned16.cc +++ b/volk/lib/qa_16s_convert_8s_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc index b46b9ae8e..c6f828ba6 100644 --- a/volk/lib/qa_16s_max_star_aligned16.cc +++ b/volk/lib/qa_16s_max_star_aligned16.cc @@ -2,7 +2,7 @@ #include #include #include -#include +#include //test for ssse3 #ifndef LV_HAVE_SSSE3 diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc index 4d44735df..0a58570e2 100644 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc +++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc @@ -3,7 +3,7 @@ #include #include #include -#include +#include //test for ssse3 #ifndef LV_HAVE_SSSE3 diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 3c4f5c6cc..819b2256b 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -2,7 +2,7 @@ #include #include #include -#include +#include //test for sse2 diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 80a220c93..66f8c9afa 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc index e700ac72c..c775e8596 100644 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc index 6ee076998..b25094e89 100644 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc index ca048ea67..c67064ea6 100644 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc index 0f4ba6923..f86f03b88 100644 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc index 5ab458bc9..dd446567e 100644 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index b14610757..9799ef43b 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 2c9e48f6e..1ebe644c5 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index 6b19828a4..ea117a820 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc index ea637d600..0defef283 100644 --- a/volk/lib/qa_32f_accumulator_aligned16.cc +++ b/volk/lib/qa_32f_accumulator_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc index 002aebfc9..f80d562d4 100644 --- a/volk/lib/qa_32f_add_aligned16.cc +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -21,6 +21,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc index 3c8137004..5d6987333 100644 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc +++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc index 84a4c40c4..3e2452e68 100644 --- a/volk/lib/qa_32f_convert_16s_aligned16.cc +++ b/volk/lib/qa_32f_convert_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc index 9469daed2..e016b7ff7 100644 --- a/volk/lib/qa_32f_convert_16s_unaligned16.cc +++ b/volk/lib/qa_32f_convert_16s_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc index ff24c7b0d..abceb52fb 100644 --- a/volk/lib/qa_32f_convert_32s_aligned16.cc +++ b/volk/lib/qa_32f_convert_32s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc index e63b17994..90f84b56f 100644 --- a/volk/lib/qa_32f_convert_32s_unaligned16.cc +++ b/volk/lib/qa_32f_convert_32s_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc index c546e47de..1d0754ac9 100644 --- a/volk/lib/qa_32f_convert_64f_aligned16.cc +++ b/volk/lib/qa_32f_convert_64f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc index 24b51f9af..6f7d5066d 100644 --- a/volk/lib/qa_32f_convert_64f_unaligned16.cc +++ b/volk/lib/qa_32f_convert_64f_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc index a3d4d6567..6a53629b5 100644 --- a/volk/lib/qa_32f_convert_8s_aligned16.cc +++ b/volk/lib/qa_32f_convert_8s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc index d885fd6bb..fbc5c20e6 100644 --- a/volk/lib/qa_32f_convert_8s_unaligned16.cc +++ b/volk/lib/qa_32f_convert_8s_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index 8826bf94f..3257a3751 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -21,6 +21,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index ca65add28..592304f83 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc index 2a937637f..a7ae60780 100644 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc +++ b/volk/lib/qa_32f_interleave_16sc_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc index c22dd1046..333b6fce8 100644 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc +++ b/volk/lib/qa_32f_interleave_32fc_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc index 3ef375176..ceb913cb4 100644 --- a/volk/lib/qa_32f_max_aligned16.cc +++ b/volk/lib/qa_32f_max_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc index 617e18b24..580a60e7d 100644 --- a/volk/lib/qa_32f_min_aligned16.cc +++ b/volk/lib/qa_32f_min_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc index e52748466..0c242b649 100644 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -21,6 +21,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc index 2954fc3ae..1c7b485a6 100644 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ b/volk/lib/qa_32f_normalize_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc index 9a5f71de0..62d55767a 100644 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -21,6 +21,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc index c0f22cdea..5934d70df 100644 --- a/volk/lib/qa_32f_stddev_aligned16.cc +++ b/volk/lib/qa_32f_stddev_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc index dcad8bcf3..78c701d78 100644 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc +++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc index a7e1b5ae3..ffe4b504c 100644 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ b/volk/lib/qa_32f_subtract_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc index a24382d71..c55ab5aa0 100644 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc +++ b/volk/lib/qa_32fc_atan2_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc index 497914e0a..2f9a30395 100644 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc +++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include #define assertcomplexEqual(expected, actual, delta) \ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc index 0f5a030f5..72e084c05 100644 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc index 6e051afbc..89770c236 100644 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc +++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc index 850518524..7472476f7 100644 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc +++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc index 321deb184..5cbdc49b3 100644 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc +++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc index aedb2e387..4147e30ae 100644 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc +++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index a4be1616b..16984e30d 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc index d69ada408..b99f1ddcf 100644 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index 83cdf4b15..a3d0955bd 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse3 diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc index 4d1359068..1444c78a9 100644 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse3 diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc index 72d05cf6f..661801709 100644 --- a/volk/lib/qa_32s_and_aligned16.cc +++ b/volk/lib/qa_32s_and_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc index eab3fe016..07d799809 100644 --- a/volk/lib/qa_32s_convert_32f_aligned16.cc +++ b/volk/lib/qa_32s_convert_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc index 0e504cfa1..2ec610ffb 100644 --- a/volk/lib/qa_32s_convert_32f_unaligned16.cc +++ b/volk/lib/qa_32s_convert_32f_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc index e09dfb91c..9da2ae344 100644 --- a/volk/lib/qa_32s_or_aligned16.cc +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc index 8b1023876..313c786b6 100644 --- a/volk/lib/qa_32u_byteswap_aligned16.cc +++ b/volk/lib/qa_32u_byteswap_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 49fcddeb2..618a82a02 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc index 0eaebf00a..7f9c4584a 100644 --- a/volk/lib/qa_64f_convert_32f_aligned16.cc +++ b/volk/lib/qa_64f_convert_32f_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc index dcf94bd27..98aadbf4d 100644 --- a/volk/lib/qa_64f_convert_32f_unaligned16.cc +++ b/volk/lib/qa_64f_convert_32f_unaligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse2 diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc index 41ab078b0..76e755514 100644 --- a/volk/lib/qa_64f_max_aligned16.cc +++ b/volk/lib/qa_64f_max_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc index b4664d065..4b70d2881 100644 --- a/volk/lib/qa_64f_min_aligned16.cc +++ b/volk/lib/qa_64f_min_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc index 4f5d4d02b..20d012c9e 100644 --- a/volk/lib/qa_64u_byteswap_aligned16.cc +++ b/volk/lib/qa_64u_byteswap_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index bce9ff6c2..85ef58795 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc index 35f08fb81..8dd5f76ca 100644 --- a/volk/lib/qa_8s_convert_16s_aligned16.cc +++ b/volk/lib/qa_8s_convert_16s_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse4_1 diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc index bb326f818..12c502d4b 100644 --- a/volk/lib/qa_8s_convert_16s_unaligned16.cc +++ b/volk/lib/qa_8s_convert_16s_unaligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse4_1 diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc index 522da0b9d..672f5662f 100644 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ b/volk/lib/qa_8s_convert_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse4.1 diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc index ea1fb7c74..43468b1b1 100644 --- a/volk/lib/qa_8s_convert_32f_unaligned16.cc +++ b/volk/lib/qa_8s_convert_32f_unaligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse4.1 diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc index 823e7fe2e..94e63e37d 100644 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc index fb580516c..29073eed7 100644 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc index 1cc844b52..4980c982a 100644 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc index 10e537cde..3c3f737a1 100644 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc index d84df8119..a33d1bf30 100644 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc @@ -2,6 +2,7 @@ #include #include #include +#include //test for sse diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc index d64eac8ce..216bf1cef 100644 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc +++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc @@ -3,7 +3,7 @@ #include #include #include -#include +#include #define assertcomplexEqual(expected, actual, delta) \ CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc index c27f0e0ca..4c707446e 100644 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc +++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc @@ -3,7 +3,7 @@ #include #include #include -#include +#include #define assertcomplexEqual(expected, actual, delta) \ CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ -- cgit From c6fff77de9b686761f93f0e1de237f8543f5e919 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 11:14:41 -0800 Subject: Volk: A bunch of new Orc routines plus a couple of build changes. 32fc_magnitude_16s fails test_all right now. --- volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 12 ++++++++++++ volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 11 +++++++++++ volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 9 +++++++++ volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 5 +++-- volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 6 +++--- volk/lib/qa_32f_max_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_min_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++---- volk/lib/qa_volk.cc | 1 - 9 files changed, 60 insertions(+), 10 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc index e700ac72c..7e9e31df5 100644 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { int16_t output_generic1[vlen] __attribute__ ((aligned (16))); int16_t output_sse2[vlen] __attribute__ ((aligned (16))); int16_t output_sse21[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); + int16_t output_orc1[vlen] __attribute__ ((aligned (16))); int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); @@ -43,6 +45,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); } @@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); + + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]); } } diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc index 6ee076998..45100206d 100644 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { float output_generic1[vlen] __attribute__ ((aligned (16))); float output_sse2[vlen] __attribute__ ((aligned (16))); float output_sse21[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); + float output_orc1[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -41,6 +43,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); } @@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4); } } diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc index 5ab458bc9..d187d20c3 100644 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc @@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { int8_t output_generic[vlen] __attribute__ ((aligned (16))); int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); + int8_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -39,6 +40,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); } @@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); } } diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index c8f13ff84..dd4ae75ff 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); - start = clock(); +/* start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); +*/ start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); @@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); + //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index e7178863c..53d42e28c 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); - start = clock(); +/* start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); - +*/ start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); @@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); +// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc index 3ef375176..cb1fd3627 100644 --- a/volk/lib/qa_32f_max_aligned16.cc +++ b/volk/lib/qa_32f_max_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_max_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc index 617e18b24..bf453f360 100644 --- a/volk/lib/qa_32f_min_aligned16.cc +++ b/volk/lib/qa_32f_min_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_min_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index c3e65866b..105d32d0c 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } + //for(int i = 0; i < 10; ++i) { + // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + //} for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index c3c27b69b..f6a334da7 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -118,7 +118,6 @@ CppUnit::TestSuite * qa_volk::suite() { CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - s->addTest(qa_16s_quad_max_star_aligned16::suite()); s->addTest(qa_32fc_dot_prod_aligned16::suite()); s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); -- cgit From 200720da362e30f74083aad4dc106e4a057638bf Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 12:20:16 -0800 Subject: Volk: Magnitude functions. 32fc_magnitude_16s currently clips to +MAX instead of -MAX. --- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 6 +++--- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index dd4ae75ff..d00315b57 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); -/* start = clock(); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); -*/ + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); @@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 105d32d0c..53b3bf790 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - //for(int i = 0; i < 10; ++i) { - // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); - //} + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + } for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); -- cgit From 0e92b93f21fc9c324c379bc318120d414e7422cc Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 13:35:40 -0800 Subject: Volk: Orc impl for 32fc_magnitude_16s saturates at -max instead of +max. --- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 6 +++--- volk/lib/qa_volk.cc | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 53b3bf790..93d4ec150 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,9 +63,9 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + for(int i = 0; i < 1; ++i) { + // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); } for(int i = 0; i < vlen; ++i) { diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index f6a334da7..c3c27b69b 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -118,6 +118,7 @@ CppUnit::TestSuite * qa_volk::suite() { CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); + s->addTest(qa_16s_quad_max_star_aligned16::suite()); s->addTest(qa_32fc_dot_prod_aligned16::suite()); s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); -- cgit From 79c514b542d25e709903b41cfdc1673aae35ac1d Mon Sep 17 00:00:00 2001 From: Eric Blossom Date: Thu, 23 Dec 2010 14:29:56 -0800 Subject: Update volk .gitignores --- volk/lib/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'volk/lib') diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 573fb1618..0f17543ab 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -19,3 +19,4 @@ /volk_mktables.c /volk_proccpu_sim.c /volk_runtime.c +/test_all -- cgit From 5b45b875ed58fd66234764a05da42c6eaff22c4d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 11 Jan 2011 15:17:55 -0800 Subject: Volk: Added more Orc routines (including complex multiply). Started redoing the testing framework so it's easier to add new archs to tests. --- volk/lib/Makefile.am | 2 + volk/lib/qa_32f_normalize_aligned16.cc | 13 +++++ volk/lib/qa_32fc_32f_multiply_aligned16.cc | 84 +++++++++++++----------------- volk/lib/qa_32fc_multiply_aligned16.cc | 12 +++++ 4 files changed, 64 insertions(+), 47 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 253033461..0aeafe4aa 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -156,6 +156,7 @@ endif # ---------------------------------------------------------------- libvolk_qa_la_SOURCES = \ qa_volk.cc \ + qa_utils.cc \ qa_16s_quad_max_star_aligned16.cc \ qa_32fc_dot_prod_aligned16.cc \ qa_32fc_square_dist_aligned16.cc \ @@ -257,6 +258,7 @@ libvolk_qa_la_LIBADD = \ noinst_HEADERS = \ volk_init.h \ qa_volk.h \ + qa_utils.h \ assembly.h \ qa_16s_quad_max_star_aligned16.h \ qa_32fc_dot_prod_aligned16.h \ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc index 1c7b485a6..0da43ecff 100644 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ b/volk/lib/qa_32f_normalize_aligned16.cc @@ -26,13 +26,16 @@ void qa_32f_normalize_aligned16::t1() { float* output0; float* output01; + float* output02; ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); + ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float)); for(int i = 0; i < vlen; ++i) { output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(float)); + memcpy(output02, output0, vlen*sizeof(float)); printf("32f_normalize_aligned\n"); start = clock(); @@ -49,6 +52,14 @@ void qa_32f_normalize_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + for(int i = 0; i < 1; ++i) { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); @@ -57,10 +68,12 @@ void qa_32f_normalize_aligned16::t1() { for(int i = 0; i < vlen; ++i) { // printf("%e...%e\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4); } free(output0); free(output01); + free(output02); } #endif diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc index 4eba0a3cd..7bb8d21c1 100644 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -2,28 +2,12 @@ #include #include #include -#include #include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); +#include +#include #define ERR_DELTA (1e-4) -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 void qa_32fc_32f_multiply_aligned16::t1() { const int vlen = 2046; @@ -36,50 +20,56 @@ void qa_32fc_32f_multiply_aligned16::t1() { std::complex* input; float * taps; int i; + std::vector archs; + archs.push_back("generic"); +#ifdef LV_HAVE_SSE3 + archs.push_back("sse3"); +#endif +#ifdef LV_HAVE_ORC + archs.push_back("orc"); +#endif - std::complex* result_generic; - std::complex* result_sse3; + std::vector* > results; ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); + + for(i=0; i < archs.size(); i++) { + std::complex *ptr; + ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float)); + if(ret) { + printf("Couldn't allocate memory\n"); + exit(1); + } + results.push_back(ptr); + } random_floats((float*)input, vlen * 2); random_floats(taps, vlen); printf("32fc_32f_multiply_aligned16\n"); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + for(i=0; i < archs.size(); i++) { + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str()); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("%s_time: %f\n", archs[i].c_str(), total); } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + for(i=0; i < vlen; i++) { + int j = 1; + for(j; j < archs.size(); j++) { + assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA); + } } free(input); free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_32f_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); + for(i=0; i < archs.size(); i++) { + free(results[i]); + } } -#endif /* LV_HAVE_SSE3 */ - diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc index e1f7eab3d..022b58ad6 100644 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_multiply_aligned16.cc @@ -41,11 +41,13 @@ void qa_32fc_multiply_aligned16::t1() { std::complex* result_generic; std::complex* result_sse3; + std::complex* result_orc; ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float)); random_floats((float*)input, vlen * 2); random_floats((float*)taps, vlen * 2); @@ -67,15 +69,25 @@ void qa_32fc_multiply_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); for(i = 0; i < vlen; i++){ assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA); } free(input); free(taps); free(result_generic); free(result_sse3); + free(result_orc); } #else -- cgit From c77bb3e71562daa68e9a195a0131b7cc04324784 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 12 Jan 2011 19:20:35 -0800 Subject: Volk: Working on a new QA architecture that doesn't require individual test programs. --- volk/lib/Makefile.am | 2 - volk/lib/qa_32fc_32f_multiply_aligned16.cc | 6 +- volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 2 +- volk/lib/qa_utils.cc | 223 ++++++++++++++++++++++++++ volk/lib/qa_utils.h | 19 +++ volk/lib/qa_volk.cc | 2 +- 6 files changed, 247 insertions(+), 7 deletions(-) create mode 100644 volk/lib/qa_utils.cc create mode 100644 volk/lib/qa_utils.h (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 0aeafe4aa..a10b0a362 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -156,7 +156,6 @@ endif # ---------------------------------------------------------------- libvolk_qa_la_SOURCES = \ qa_volk.cc \ - qa_utils.cc \ qa_16s_quad_max_star_aligned16.cc \ qa_32fc_dot_prod_aligned16.cc \ qa_32fc_square_dist_aligned16.cc \ @@ -181,7 +180,6 @@ libvolk_qa_la_SOURCES = \ qa_32f_dot_prod_aligned16.cc \ qa_32f_dot_prod_unaligned16.cc \ qa_32f_fm_detect_aligned16.cc \ - qa_32fc_32f_multiply_aligned16.cc \ qa_32fc_multiply_aligned16.cc \ qa_32f_divide_aligned16.cc \ qa_32f_multiply_aligned16.cc \ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc index 7bb8d21c1..b80e0e008 100644 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -5,10 +5,11 @@ #include #include #include +#include -#define ERR_DELTA (1e-4) +#define TOLERANCE (1e-4) -void qa_32fc_32f_multiply_aligned16::t1() { +void qa_32fc_32f_multiply_aligned16(void) { const int vlen = 2046; const int ITERS = 100000; @@ -72,4 +73,3 @@ void qa_32fc_32f_multiply_aligned16::t1() { free(results[i]); } } - diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc index 94e63e37d..f753e1107 100644 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc @@ -40,7 +40,7 @@ void qa_8sc_deinterleave_16s_aligned16::t1() { start = clock(); for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc new file mode 100644 index 000000000..4d93ca62a --- /dev/null +++ b/volk/lib/qa_utils.cc @@ -0,0 +1,223 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include +//#include + +float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +template +t *make_aligned_buffer(unsigned int len) { + t *buf; + int ret; + ret = posix_memalign((void**)&buf, 16, len * sizeof(t)); + assert(ret == 0); + return buf; +} + +void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { + BOOST_FOREACH(std::string sig, inputsig) { + if (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else std::cout << "Invalid type!" << std::endl; + } +} + +static std::vector get_arch_list(const int archs[]) { + std::vector archlist; + int num_archs = archs[0]; + + //there has got to be a way to query these arches + for(int i = 0; i < num_archs; i++) { + switch(archs[i+1]) { + case (1< valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u"); + + BOOST_FOREACH(std::string this_type, valid_types) { + if(type == this_type) return true; + } + return false; +} + + +static void get_function_signature(std::vector &inputsig, + std::vector &outputsig, + std::string name) { + boost::char_separator sep("_"); + boost::tokenizer > tok(name, sep); + std::vector toked; + tok.assign(name); + toked.assign(tok.begin(), tok.end()); + + assert(toked[0] == "volk"); + + inputsig.push_back(toked[1]); //mandatory + int pos = 2; + bool valid_type = true; + while(valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]); + else valid_type = false; + pos++; + } + while(!valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) valid_type = true; + pos++; + } + while(valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); + else valid_type = false; + pos++; + } + + //if there's no explicit output sig then assume the output is the same as the first input + if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + assert(inputsig.size() != 0); + assert(outputsig.size() != 0); +} + +inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str()); +} + +inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str()); +} + +inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); +} + +bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) { + std::cout << "RUN_VOLK_TESTS: " << name << std::endl; + + //first let's get a list of available architectures for the test + std::vector arch_list = get_arch_list(archs); + + BOOST_FOREACH(std::string arch, arch_list) { + std::cout << "Found an arch: " << arch << std::endl; + } + + //now we have to get a function signature by parsing the name + std::vector inputsig, outputsig; + get_function_signature(inputsig, outputsig, name); + + for(int i=0; i inbuffs; + make_buffer_for_signature(inbuffs, inputsig, vlen); + + //and set the input buffers to something random + //TODO + + //allocate output buffers -- one for each output for each arch + std::vector outbuffs; + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + + //now run the test + clock_t start, end; + for(int i = 0; i < arch_list.size(); i++) { + start = clock(); + switch(outputsig.size()+inputsig.size()) { + case 2: + run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + case 3: + run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + case 4: + run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + default: + break; + } + end = clock(); + std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + } + + //and now compare each output to the generic output + //first we have to know which output is the generic one, they aren't in order... + int generic_offset; + for(int i=0; i +#include +#include + +float uniform(void); +void random_floats(float *buf, unsigned n); + +bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); + +#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter) + +typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); +typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); + +#endif //VOLK_QA_UTILS_H diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index c3c27b69b..8e7e59768 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -143,7 +143,7 @@ qa_volk::suite() s->addTest(qa_32f_dot_prod_aligned16::suite()); s->addTest(qa_32f_dot_prod_unaligned16::suite()); s->addTest(qa_32f_fm_detect_aligned16::suite()); - s->addTest(qa_32fc_32f_multiply_aligned16::suite()); + //s->addTest(qa_32fc_32f_multiply_aligned16::suite()); s->addTest(qa_32fc_multiply_aligned16::suite()); s->addTest(qa_32f_divide_aligned16::suite()); s->addTest(qa_32f_multiply_aligned16::suite()); -- cgit From 9a527257014878cac993ffe854bf8fdacc412be6 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 14 Jan 2011 13:07:06 -0800 Subject: Volk: QA code fixes, more Orc routines. Broke the 32fc_multiply Orc impl because I'm lame and lost some work. Fixed volk_8s_convert_16s Orc impl. Still need to rename functions and modify the QA sig parser to match. Then rewrite makefiles. --- volk/lib/qa_utils.cc | 94 ++++++++++++++++++++++++++++++++++++++++++---------- volk/lib/qa_utils.h | 2 +- 2 files changed, 77 insertions(+), 19 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 4d93ca62a..fa21db487 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,7 +3,7 @@ #include #include #include -#include +//#include #include #include #include @@ -13,19 +13,39 @@ #include #include #include -//#include float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) } -void -random_floats (float *buf, unsigned n) +void random_floats (float *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); } +void load_random_data(void *data, std::string sig, unsigned int n) { + if(sig == "32fc") { + random_floats((float *)data, n*2); + } else if(sig == "32f") { + random_floats((float *)data, n); + } else if(sig == "32u") { + for(int i=0; i((RAND_MAX/2))) * 32768.0)); + } else if(sig == "16sc") { + for(int i=0; i((RAND_MAX/2))) * 32768.0)); + } else if(sig == "8u") { + for(int i=0; i((RAND_MAX/2)) * 256.0)); + } else if(sig == "8s") { + for(int i=0; i((RAND_MAX/2)) * 128.0)); + } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl; +} + template t *make_aligned_buffer(unsigned int len) { t *buf; @@ -37,11 +57,11 @@ t *make_aligned_buffer(unsigned int len) { void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { BOOST_FOREACH(std::string sig, inputsig) { - if (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else std::cout << "Invalid type!" << std::endl; + if (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else std::cout << "Invalid type: " << sig << std::endl; } } @@ -90,7 +110,7 @@ static std::vector get_arch_list(const int archs[]) { } static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u"); + std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u"); BOOST_FOREACH(std::string this_type, valid_types) { if(type == this_type) return true; @@ -120,16 +140,23 @@ static void get_function_signature(std::vector &inputsig, } while(!valid_type && pos < toked.size()) { if(is_valid_type(toked[pos])) valid_type = true; - pos++; + else pos++; } while(valid_type && pos < toked.size()) { if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); else valid_type = false; pos++; } - - //if there's no explicit output sig then assume the output is the same as the first input - if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + + //if there's no output sig and only one input sig, assume there are 2 inputs + //this handles conversion fn's (which have a specified output sig) and most of the rest + if(outputsig.size() == 0 && inputsig.size() == 1) { + outputsig.push_back(inputsig[0]); + inputsig.push_back(inputsig[0]); + }//if there's no explicit output sig then assume the output is the same as the first input + else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + + assert(inputsig.size() != 0); assert(outputsig.size() != 0); } @@ -168,7 +195,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, make_buffer_for_signature(inbuffs, inputsig, vlen); //and set the input buffers to something random - //TODO + for(int i=0; i outbuffs; @@ -204,9 +233,38 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(arch_list[i] == "generic") generic_offset=i; for(int i=0; i tol) { + std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; + return 1; + } + } + } else if(outputsig[0] == "32f") { + for(int j=0; j tol) { + std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; + return 1; + } + } + } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") { + for(int j=0; j get_arch_list(const int archs[]) { } static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u"); + std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f") + ("32s")("32u")("16sc")("16s") + ("16u")("8s")("8sc")("8u") + ("s32f")("s16u")("s16s")("s8u") + ("s8s"); BOOST_FOREACH(std::string this_type, valid_types) { if(type == this_type) return true; @@ -148,17 +152,11 @@ static void get_function_signature(std::vector &inputsig, pos++; } - //if there's no output sig and only one input sig, assume there are 2 inputs - //this handles conversion fn's (which have a specified output sig) and most of the rest - if(outputsig.size() == 0 && inputsig.size() == 1) { - outputsig.push_back(inputsig[0]); - inputsig.push_back(inputsig[0]); - }//if there's no explicit output sig then assume the output is the same as the first input - else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); - - assert(inputsig.size() != 0); - assert(outputsig.size() != 0); +} + +inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buff, vlen, arch.c_str()); } inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { @@ -190,26 +188,42 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, for(int i=0; i inbuffs; make_buffer_for_signature(inbuffs, inputsig, vlen); + //allocate output buffers -- one for each output for each arch + std::vector outbuffs; + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + //and set the input buffers to something random for(int i=0; i outbuffs; - BOOST_FOREACH(std::string arch, arch_list) { - make_buffer_for_signature(outbuffs, outputsig, vlen); + //so let's see here. if the operation has no output sig, it operates in place, + //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers. + if(outputsig.size() == 0) { + //make a set of output buffers according to the input signature + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, inputsig, vlen); + } + //copy input buffer[0] to all the output buffers so it has something to operate on + //output buffer element size is the same as input buffer[0] + if( } + //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); switch(outputsig.size()+inputsig.size()) { + case 1: + run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + break; case 2: run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); break; @@ -262,6 +276,13 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, return 1; } } + } else if(outputsig[0] == "8s" || outputsig[0] == "8u") { + for(int j=0; j #include -#include float uniform(void); void random_floats(float *buf, unsigned n); @@ -12,6 +11,7 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); -- cgit From be1b7d9ffb90aa9c750e6c6793f00dbc8bec486d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 19 Jan 2011 16:39:28 -0800 Subject: Volk: test suite supports scalar arguments and in-place operations --- volk/lib/qa_utils.cc | 357 +++++++++++++++++++++++++++++++-------------------- volk/lib/qa_utils.h | 15 ++- 2 files changed, 231 insertions(+), 141 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index a8c00c143..e73b70985 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -7,7 +7,8 @@ #include #include #include -//#include +#include +#include //#include #include #include @@ -24,44 +25,53 @@ void random_floats (float *buf, unsigned n) buf[i] = uniform (); } -void load_random_data(void *data, std::string sig, unsigned int n) { - if(sig == "32fc") { - random_floats((float *)data, n*2); - } else if(sig == "32f") { +void load_random_data(void *data, volk_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + assert(type.size == 4); //TODO: double support random_floats((float *)data, n); - } else if(sig == "32u") { - for(int i=0; i((RAND_MAX/2))) * 32768.0)); - } else if(sig == "16sc") { - for(int i=0; i((RAND_MAX/2))) * 32768.0)); - } else if(sig == "8u") { - for(int i=0; i((RAND_MAX/2)) * 256.0)); - } else if(sig == "8s") { - for(int i=0; i((RAND_MAX/2)) * 128.0)); - } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl; + } else { + float int_max = pow(2, type.size*8); + if(type.is_signed) int_max /= 2.0; + for(int i=0; i((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw; //no shenanigans here + } + } + } } -template -t *make_aligned_buffer(unsigned int len) { - t *buf; +void *make_aligned_buffer(unsigned int len, unsigned int size) { + void *buf; int ret; - ret = posix_memalign((void**)&buf, 16, len * sizeof(t)); + ret = posix_memalign((void**)&buf, 16, len * size); assert(ret == 0); return buf; } -void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { - BOOST_FOREACH(std::string sig, inputsig) { - if (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else std::cout << "Invalid type: " << sig << std::endl; +void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); } } @@ -109,22 +119,56 @@ static std::vector get_arch_list(const int archs[]) { return archlist; } -static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f") - ("32s")("32u")("16sc")("16s") - ("16u")("8s")("8sc")("8u") - ("s32f")("s16u")("s16s")("s8u") - ("s8s"); +volk_type_t volk_type_from_string(std::string name) { + volk_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + assert(name.size() > 1); - BOOST_FOREACH(std::string this_type, valid_types) { - if(type == this_type) return true; + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + int last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos < 0) throw 0; + //will throw if malformed + int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(int i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } } - return false; -} + return type; +} -static void get_function_signature(std::vector &inputsig, - std::vector &outputsig, +static void get_signatures_from_name(std::vector &inputsig, + std::vector &outputsig, std::string name) { boost::char_separator sep("_"); boost::tokenizer > tok(name, sep); @@ -133,25 +177,38 @@ static void get_function_signature(std::vector &inputsig, toked.assign(tok.begin(), tok.end()); assert(toked[0] == "volk"); - - inputsig.push_back(toked[1]); //mandatory - int pos = 2; - bool valid_type = true; - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]); - else valid_type = false; - pos++; - } - while(!valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) valid_type = true; - else pos++; - } - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); - else valid_type = false; - pos++; + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_type_from_string(token); + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); } +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buff, scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); +} + +template +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + for(int i=0; i tol) return 1; + } + return 0; +} + +template +bool icompare(t *in1, t *in2, unsigned int vlen) { + for(int i=0; i arch_list = get_arch_list(archs); - BOOST_FOREACH(std::string arch, arch_list) { - std::cout << "Found an arch: " << arch << std::endl; - } - //now we have to get a function signature by parsing the name - std::vector inputsig, outputsig; - get_function_signature(inputsig, outputsig, name); - - for(int i=0; i inbuffs; - make_buffer_for_signature(inbuffs, inputsig, vlen); + std::vector inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); - //allocate output buffers -- one for each output for each arch - std::vector outbuffs; - BOOST_FOREACH(std::string arch, arch_list) { - make_buffer_for_signature(outbuffs, outputsig, vlen); - } - - //and set the input buffers to something random + std::vector inputsc, outputsc; for(int i=0; i inbuffs, outbuffs; - //so let's see here. if the operation has no output sig, it operates in place, - //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers. - if(outputsig.size() == 0) { - //make a set of output buffers according to the input signature - BOOST_FOREACH(std::string arch, arch_list) { + if(outputsig.size() == 0) { //we're operating in place... + //assert(inputsig.size() == 1); //we only support 0 output 1 input right now... + make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer + load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data + BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer make_buffer_for_signature(outbuffs, inputsig, vlen); + memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1)); + } + } else { + make_buffer_for_signature(inbuffs, inputsig, vlen); + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + + //and set the input buffers to something random + for(int i=0; i1 scalars"; break; case 2: - run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]); + } else throw "unsupported 2 arg function >1 scalars"; break; case 3: run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); @@ -234,69 +328,52 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); break; default: + throw "no function handler for this signature"; break; } + end = clock(); std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; } - //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset; for(int i=0; i tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32f") { - for(int j=0; j tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") { - for(int j=0; j #include +struct volk_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_type_t volk_type_from_string(std::string); + float uniform(void); void random_floats(float *buf, unsigned n); @@ -11,9 +22,11 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) -typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); #endif //VOLK_QA_UTILS_H -- cgit From e3600f59e76c3dc08aedfd77629b7c5c48df86af Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 20 Jan 2011 16:30:09 -0800 Subject: volk: renamed all files. added all tests. some test things are still broken. --- volk/lib/qa_utils.cc | 101 +++++++++++++++++++++++++++++++-------------------- volk/lib/qa_utils.h | 1 + 2 files changed, 62 insertions(+), 40 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index e73b70985..4c151bd6f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -19,7 +19,8 @@ float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) } -void random_floats (float *buf, unsigned n) +template +void random_floats (t *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); @@ -28,8 +29,8 @@ void random_floats (float *buf, unsigned n) void load_random_data(void *data, volk_type_t type, unsigned int n) { if(type.is_complex) n *= 2; if(type.is_float) { - assert(type.size == 4); //TODO: double support - random_floats((float *)data, n); + if(type.size == 8) random_floats((double *)data, n); + else random_floats((float *)data, n); } else { float int_max = pow(2, type.size*8); if(type.is_signed) int_max /= 2.0; @@ -54,7 +55,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; break; default: - throw; //no shenanigans here + throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here } } } @@ -94,6 +95,9 @@ static std::vector get_arch_list(const int archs[]) { case (1< 1); + if(name.size() < 2) throw std::string("name too short to be a datatype"); //is it a scalar? if(name[0] == 's') { @@ -138,7 +142,7 @@ volk_type_t volk_type_from_string(std::string name) { //get the data size int last_size_pos = name.find_last_of("0123456789"); - if(last_size_pos < 0) throw 0; + if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); //will throw if malformed int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); @@ -182,12 +186,14 @@ static void get_signatures_from_name(std::vector &inputsig, //ok. we're assuming a string in the form //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) - enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT; + enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; std::string fn_name; volk_type_t type; BOOST_FOREACH(std::string token, toked) { try { type = volk_type_from_string(token); + if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... + if(side == SIDE_INPUT) inputsig.push_back(type); else outputsig.push_back(type); } catch (...){ @@ -201,9 +207,11 @@ static void get_signatures_from_name(std::vector &inputsig, } } else if(side == SIDE_INPUT) { //it's the function name, at least it better be - side = SIDE_OUTPUT; - fn_name = token; - } else { + side = SIDE_NAME; + fn_name.append("_"); + fn_name.append(token); + } + else if(side == SIDE_OUTPUT) { if(token != toked.back()) throw; //the last token in the name is the alignment } } @@ -236,20 +244,40 @@ inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vect while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); } +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str()); +} + template bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; for(int i=0; i tol) return 1; + if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } } - return 0; + + return fail; } template -bool icompare(t *in1, t *in2, unsigned int vlen) { +bool icompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; for(int i=0; i 0) { + std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl; + } + } } - return 0; + + return fail; } bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) { @@ -300,7 +328,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, load_random_data(inbuffs[i], inputsig[i], vlen); } } - + //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { @@ -311,18 +339,22 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: - run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + } else throw "unsupported 3 arg function >1 scalars"; break; case 4: run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); @@ -337,29 +369,24 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, } //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... - int generic_offset; + int generic_offset=0; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { @@ -265,14 +266,14 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { } template -bool icompare(t *in1, t *in2, unsigned int vlen, float tol) { +bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { - std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl; + std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])) << std::endl; } } } @@ -339,21 +340,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: @@ -375,7 +376,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //now compare if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know - + //TODO: loop over the output signature as well bool fail = false; for(int i=0; i &inputsig, assert(inputsig.size() != 0); } -inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(buff, vlen, arch.c_str()); +inline void run_cast_test1(volk_fn_1arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], vlen, arch.c_str()); } -inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str()); +inline void run_cast_test2(volk_fn_2arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); } -inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str()); +inline void run_cast_test3(volk_fn_3arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); } -inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); +inline void run_cast_test4(volk_fn_4arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); } -inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(buff, scalar, vlen, arch.c_str()); +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } -inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } -inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str()); +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } template @@ -253,7 +254,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { @@ -291,74 +292,70 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); - std::vector inputsc, outputsc; + //pull the input scalars into their own vector + std::vector inputsc; for(int i=0; i inbuffs, outbuffs; + std::vector inbuffs; + + make_buffer_for_signature(inbuffs, inputsig, vlen); + for(int i=0; i > test_data; + for(int i=0; i arch_buffs; + for(int j=0; j both_sigs; + both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); + both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); - switch(inputsig.size() + outputsig.size()) { + switch(both_sigs.size()) { case 1: if(inputsc.size() == 0) { - run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { - run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { - run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: - run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); break; default: throw "no function handler for this signature"; @@ -375,61 +372,63 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(arch_list[i] == "generic") generic_offset=i; //now compare - if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know - //TODO: loop over the output signature as well + //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know + bool fail = false; + bool fail_global = false; for(int i=0; i1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 79fc8f006..e2539060a 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -18,9 +18,9 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); +bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); -#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 4dd7f7599..9f4934dc0 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -7,93 +7,93 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { //in order... // VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); // VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000); // VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); // VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); } -- cgit From 6091bad60cdfdf21624da452c7a8b74405345070 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 21 Jan 2011 15:41:30 -0800 Subject: Volk: removed all the old QA code that is covered by the test framework. --- volk/lib/Makefile.am | 1 - volk/lib/qa_16s_convert_32f_aligned16.cc | 74 ------- volk/lib/qa_16s_convert_32f_aligned16.h | 18 -- volk/lib/qa_16s_convert_32f_unaligned16.cc | 74 ------- volk/lib/qa_16s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_16s_convert_8s_aligned16.cc | 61 ------ volk/lib/qa_16s_convert_8s_aligned16.h | 18 -- volk/lib/qa_16s_convert_8s_unaligned16.cc | 61 ------ volk/lib/qa_16s_convert_8s_unaligned16.h | 18 -- volk/lib/qa_16s_max_star_aligned16.cc | 65 ------- volk/lib/qa_16s_max_star_aligned16.h | 18 -- volk/lib/qa_16s_max_star_horizontal_aligned16.cc | 79 -------- volk/lib/qa_16s_max_star_horizontal_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 89 --------- volk/lib/qa_16sc_deinterleave_16s_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 75 -------- volk/lib/qa_16sc_deinterleave_32f_aligned16.h | 18 -- .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc | 72 ------- volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h | 18 -- .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 124 ------------ volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 70 ------- volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h | 18 -- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 81 -------- volk/lib/qa_16sc_magnitude_16s_aligned16.h | 18 -- volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 131 ------------- volk/lib/qa_16sc_magnitude_32f_aligned16.h | 18 -- volk/lib/qa_16u_byteswap_aligned16.cc | 71 ------- volk/lib/qa_16u_byteswap_aligned16.h | 18 -- volk/lib/qa_32f_accumulator_aligned16.cc | 57 ------ volk/lib/qa_32f_accumulator_aligned16.h | 18 -- volk/lib/qa_32f_add_aligned16.cc | 123 ------------ volk/lib/qa_32f_add_aligned16.h | 18 -- .../qa_32f_calc_spectral_noise_floor_aligned16.cc | 60 ------ .../qa_32f_calc_spectral_noise_floor_aligned16.h | 18 -- volk/lib/qa_32f_convert_16s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_16s_aligned16.h | 18 -- volk/lib/qa_32f_convert_16s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_16s_unaligned16.h | 18 -- volk/lib/qa_32f_convert_32s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_32s_aligned16.h | 18 -- volk/lib/qa_32f_convert_32s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_32s_unaligned16.h | 18 -- volk/lib/qa_32f_convert_64f_aligned16.cc | 61 ------ volk/lib/qa_32f_convert_64f_aligned16.h | 18 -- volk/lib/qa_32f_convert_64f_unaligned16.cc | 61 ------ volk/lib/qa_32f_convert_64f_unaligned16.h | 18 -- volk/lib/qa_32f_convert_8s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_8s_aligned16.h | 18 -- volk/lib/qa_32f_convert_8s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_8s_unaligned16.h | 18 -- volk/lib/qa_32f_divide_aligned16.cc | 133 ------------- volk/lib/qa_32f_divide_aligned16.h | 18 -- volk/lib/qa_32f_dot_prod_aligned16.cc | 183 ------------------ volk/lib/qa_32f_dot_prod_aligned16.h | 18 -- volk/lib/qa_32f_dot_prod_unaligned16.cc | 190 ------------------ volk/lib/qa_32f_dot_prod_unaligned16.h | 18 -- volk/lib/qa_32f_interleave_16sc_aligned16.cc | 76 -------- volk/lib/qa_32f_interleave_16sc_aligned16.h | 18 -- volk/lib/qa_32f_interleave_32fc_aligned16.cc | 63 ------ volk/lib/qa_32f_interleave_32fc_aligned16.h | 18 -- volk/lib/qa_32f_max_aligned16.cc | 70 ------- volk/lib/qa_32f_max_aligned16.h | 18 -- volk/lib/qa_32f_min_aligned16.cc | 70 ------- volk/lib/qa_32f_min_aligned16.h | 18 -- volk/lib/qa_32f_multiply_aligned16.cc | 123 ------------ volk/lib/qa_32f_multiply_aligned16.h | 18 -- volk/lib/qa_32f_normalize_aligned16.cc | 79 -------- volk/lib/qa_32f_normalize_aligned16.h | 18 -- volk/lib/qa_32f_power_aligned16.cc | 95 --------- volk/lib/qa_32f_power_aligned16.h | 18 -- volk/lib/qa_32f_sqrt_aligned16.cc | 128 ------------ volk/lib/qa_32f_sqrt_aligned16.h | 18 -- volk/lib/qa_32f_stddev_aligned16.cc | 75 -------- volk/lib/qa_32f_stddev_aligned16.h | 18 -- volk/lib/qa_32f_stddev_and_mean_aligned16.cc | 76 -------- volk/lib/qa_32f_stddev_and_mean_aligned16.h | 18 -- volk/lib/qa_32f_subtract_aligned16.cc | 70 ------- volk/lib/qa_32f_subtract_aligned16.h | 18 -- volk/lib/qa_32f_sum_of_poly_aligned16.cc | 142 -------------- volk/lib/qa_32f_sum_of_poly_aligned16.h | 18 -- volk/lib/qa_32fc_32f_multiply_aligned16.cc | 75 -------- volk/lib/qa_32fc_32f_multiply_aligned16.h | 18 -- volk/lib/qa_32fc_32f_power_32fc_aligned16.cc | 83 -------- volk/lib/qa_32fc_32f_power_32fc_aligned16.h | 18 -- volk/lib/qa_32fc_atan2_32f_aligned16.cc | 76 -------- volk/lib/qa_32fc_atan2_32f_aligned16.h | 18 -- volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc | 138 ------------- volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h | 18 -- volk/lib/qa_32fc_deinterleave_32f_aligned16.cc | 64 ------ volk/lib/qa_32fc_deinterleave_32f_aligned16.h | 18 -- volk/lib/qa_32fc_deinterleave_64f_aligned16.cc | 64 ------ volk/lib/qa_32fc_deinterleave_64f_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h | 18 -- volk/lib/qa_32fc_dot_prod_aligned16.cc | 214 --------------------- volk/lib/qa_32fc_dot_prod_aligned16.h | 20 -- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 80 -------- volk/lib/qa_32fc_magnitude_16s_aligned16.h | 18 -- volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 80 -------- volk/lib/qa_32fc_magnitude_32f_aligned16.h | 18 -- volk/lib/qa_32fc_multiply_aligned16.cc | 98 ---------- volk/lib/qa_32fc_multiply_aligned16.h | 18 -- volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc | 64 ------ volk/lib/qa_32fc_power_spectrum_32f_aligned16.h | 18 -- volk/lib/qa_32fc_square_dist_aligned16.cc | 91 --------- volk/lib/qa_32fc_square_dist_aligned16.h | 18 -- .../qa_32fc_square_dist_scalar_mult_aligned16.cc | 96 --------- .../qa_32fc_square_dist_scalar_mult_aligned16.h | 18 -- volk/lib/qa_32s_and_aligned16.cc | 70 ------- volk/lib/qa_32s_and_aligned16.h | 18 -- volk/lib/qa_32s_convert_32f_aligned16.cc | 61 ------ volk/lib/qa_32s_convert_32f_aligned16.h | 18 -- volk/lib/qa_32s_convert_32f_unaligned16.cc | 61 ------ volk/lib/qa_32s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_32s_or_aligned16.cc | 70 ------- volk/lib/qa_32s_or_aligned16.h | 18 -- volk/lib/qa_32u_byteswap_aligned16.cc | 60 ------ volk/lib/qa_32u_byteswap_aligned16.h | 18 -- volk/lib/qa_64f_convert_32f_aligned16.cc | 61 ------ volk/lib/qa_64f_convert_32f_aligned16.h | 18 -- volk/lib/qa_64f_convert_32f_unaligned16.cc | 61 ------ volk/lib/qa_64f_convert_32f_unaligned16.h | 18 -- volk/lib/qa_64f_max_aligned16.cc | 61 ------ volk/lib/qa_64f_max_aligned16.h | 18 -- volk/lib/qa_64f_min_aligned16.cc | 61 ------ volk/lib/qa_64f_min_aligned16.h | 18 -- volk/lib/qa_64u_byteswap_aligned16.cc | 60 ------ volk/lib/qa_64u_byteswap_aligned16.h | 18 -- volk/lib/qa_8s_convert_16s_aligned16.cc | 64 ------ volk/lib/qa_8s_convert_16s_aligned16.h | 18 -- volk/lib/qa_8s_convert_16s_unaligned16.cc | 64 ------ volk/lib/qa_8s_convert_16s_unaligned16.h | 18 -- volk/lib/qa_8s_convert_32f_aligned16.cc | 72 ------- volk/lib/qa_8s_convert_32f_aligned16.h | 18 -- volk/lib/qa_8s_convert_32f_unaligned16.cc | 64 ------ volk/lib/qa_8s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 68 ------- volk/lib/qa_8sc_deinterleave_16s_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_32f_aligned16.cc | 135 ------------- volk/lib/qa_8sc_deinterleave_32f_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc | 65 ------- volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 139 ------------- volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc | 61 ------ volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h | 18 -- .../qa_8sc_multiply_conjugate_16sc_aligned16.cc | 87 --------- .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h | 18 -- .../qa_8sc_multiply_conjugate_32fc_aligned16.cc | 87 --------- .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h | 18 -- volk/lib/qa_volk.cc | 211 -------------------- volk/lib/qa_volk.h | 36 ---- volk/lib/test_all.cc | 82 -------- 158 files changed, 8144 deletions(-) delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h delete mode 100644 volk/lib/qa_16s_max_star_aligned16.cc delete mode 100644 volk/lib/qa_16s_max_star_aligned16.h delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.h delete mode 100644 volk/lib/qa_32f_add_aligned16.cc delete mode 100644 volk/lib/qa_32f_add_aligned16.h delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h delete mode 100644 volk/lib/qa_32f_divide_aligned16.cc delete mode 100644 volk/lib/qa_32f_divide_aligned16.h delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h delete mode 100644 volk/lib/qa_32f_max_aligned16.cc delete mode 100644 volk/lib/qa_32f_max_aligned16.h delete mode 100644 volk/lib/qa_32f_min_aligned16.cc delete mode 100644 volk/lib/qa_32f_min_aligned16.h delete mode 100644 volk/lib/qa_32f_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32f_multiply_aligned16.h delete mode 100644 volk/lib/qa_32f_normalize_aligned16.cc delete mode 100644 volk/lib/qa_32f_normalize_aligned16.h delete mode 100644 volk/lib/qa_32f_power_aligned16.cc delete mode 100644 volk/lib/qa_32f_power_aligned16.h delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.h delete mode 100644 volk/lib/qa_32f_stddev_aligned16.cc delete mode 100644 volk/lib/qa_32f_stddev_aligned16.h delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h delete mode 100644 volk/lib/qa_32f_subtract_aligned16.cc delete mode 100644 volk/lib/qa_32f_subtract_aligned16.h delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.h delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h delete mode 100644 volk/lib/qa_32s_and_aligned16.cc delete mode 100644 volk/lib/qa_32s_and_aligned16.h delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_32s_or_aligned16.cc delete mode 100644 volk/lib/qa_32s_or_aligned16.h delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_64f_max_aligned16.cc delete mode 100644 volk/lib/qa_64f_max_aligned16.h delete mode 100644 volk/lib/qa_64f_min_aligned16.cc delete mode 100644 volk/lib/qa_64f_min_aligned16.h delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h delete mode 100644 volk/lib/qa_volk.cc delete mode 100644 volk/lib/qa_volk.h delete mode 100644 volk/lib/test_all.cc (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 63df85244..bbc993fa2 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -110,7 +110,6 @@ endif # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_volk.h \ qa_utils.h \ assembly.h diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc deleted file mode 100644 index 6215f4a64..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h deleted file mode 100644 index ef813d96f..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc deleted file mode 100644 index 46c2e48ac..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_unaligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h deleted file mode 100644 index aeb04f770..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc deleted file mode 100644 index 8225aa0cf..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h deleted file mode 100644 index 2e409d0cc..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H - -#include -#include - -class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc deleted file mode 100644 index e6ce5030e..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h deleted file mode 100644 index 4b2fe9e42..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H - -#include -#include - -class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc deleted file mode 100644 index c6f828ba6..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include -#include -#include -#include -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - - -void qa_16s_max_star_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 6400; - const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[1] __attribute__ ((aligned (16))); - - short output1[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h deleted file mode 100644 index 119f87c4d..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H - -#include -#include - -class qa_16s_max_star_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc deleted file mode 100644 index 0a58570e2..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_horizontal_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - -void qa_16s_max_star_horizontal_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 32; - const int ITERS = 1; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[vlen>>1] __attribute__ ((aligned (16))); - - short output1[vlen>>1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))); - - short minus0 = ((short) (rand() - (RAND_MAX/2))); - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_horizontal_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - /* volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/ - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < (vlen >> 1); ++i) { - // printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - - } - for(int i = 0; i < (vlen >> 1); ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } - } - - -#endif - diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h deleted file mode 100644 index 9f9757253..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H - -#include -#include - -class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index aadc39067..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,89 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_sse21[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_orc1[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 995ab5b34..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 13151be13..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16sc_deinterleave_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - float output_sse21[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_orc1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h deleted file mode 100644 index fea3b6c2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index c67064ea6..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index ebb70b97a..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index f86f03b88..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE */ - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE4_1 */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index e83426473..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index 803caaa2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int8_t output_orc[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 04e5511e5..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc deleted file mode 100644 index 7fbdd8620..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h deleted file mode 100644 index 4664b70f4..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc deleted file mode 100644 index 54cc2ba6e..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,131 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_32f_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2)); - } - printf("16sc_magnitude_32f_aligned\n"); - - float scale = 32768.0; - for(int i = 0; i < vlen; ++i) { - float re = (float)(input0[i].real())/scale; - float im = (float)(input0[i].imag())/scale; - output_known[i] = sqrt(re*re + im*im); - } - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 100; ++i) { - printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag()); - printf("generic... %f == %f\n", output_generic[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); - } -} - -#else - -void qa_16sc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("16sc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); -/* start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); -*/ - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); -// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h deleted file mode 100644 index 0c25673ea..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc deleted file mode 100644 index c2295968b..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint16_t output0[vlen] __attribute__ ((aligned (16))); - uint16_t output01[vlen] __attribute__ ((aligned (16))); - uint16_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint16_t)); - memcpy(output02, output0, vlen*sizeof(uint16_t)); - - printf("16u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output02, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h deleted file mode 100644 index e11b23e3f..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_16u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc deleted file mode 100644 index 0defef283..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_accumulator_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_accumulator_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float accumulator_generic; - float accumulator_sse; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_accumulator_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4); -} - -#endif diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h deleted file mode 100644 index 0004d3ff0..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H - -#include -#include - -class qa_32f_accumulator_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc deleted file mode 100644 index a183d4d85..000000000 --- a/volk/lib/qa_32f_add_aligned16.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_add_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] + input1[i]; - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_add_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h deleted file mode 100644 index 58e2a151c..000000000 --- a/volk/lib/qa_32f_add_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H -#define INCLUDED_QA_32F_ADD_ALIGNED16_H - -#include -#include - -class qa_32f_add_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_add_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc deleted file mode 100644 index 5d6987333..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[1] __attribute__ ((aligned (16))); - float output01[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_calc_spectral_noise_floor_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h deleted file mode 100644 index c5dce2c4b..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H - -#include -#include - -class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc deleted file mode 100644 index 3e2452e68..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h deleted file mode 100644 index fce1eb417..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc deleted file mode 100644 index e016b7ff7..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h deleted file mode 100644 index 492bc80e6..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc deleted file mode 100644 index abceb52fb..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_32s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h deleted file mode 100644 index 97d854463..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc deleted file mode 100644 index 90f84b56f..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_32s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h deleted file mode 100644 index 5d662d86d..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc deleted file mode 100644 index 1d0754ac9..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h deleted file mode 100644 index 41eb3e094..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H - -#include -#include - -class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc deleted file mode 100644 index 6f7d5066d..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_64f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h deleted file mode 100644 index 4b144f033..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc deleted file mode 100644 index 6a53629b5..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h deleted file mode 100644 index 68a523f34..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc deleted file mode 100644 index fbc5c20e6..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h deleted file mode 100644 index 88d4ff42a..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc deleted file mode 100644 index f2a1b9e7f..000000000 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_divide_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output1[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] / input1[i]; - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]); - } -} - -#else - -void qa_32f_divide_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h deleted file mode 100644 index 79d5ae4b8..000000000 --- a/volk/lib/qa_32f_divide_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H -#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H - -#include -#include - -class qa_32f_divide_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc deleted file mode 100644 index 98c1f2d99..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_aligned16::t1() { - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h deleted file mode 100644 index 6931a9e98..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc deleted file mode 100644 index 8e97d4249..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.cc +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_unaligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - input = &input[1]; // Make sure the buffer is unaligned - taps = &taps[1]; // Make sure the buffer is unaligned - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(&input[-1]); - free(&taps[-1]); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h deleted file mode 100644 index e8bad07fe..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H - -#include -#include - -class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc deleted file mode 100644 index a7ae60780..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32f_interleave_16sc_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_16sc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex output_generic[vlen] __attribute__ ((aligned (16))); - std::complex output_sse[vlen] __attribute__ ((aligned (16))); - std::complex output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_interleave_16sc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h deleted file mode 100644 index 8d2914817..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H - -#include -#include - -class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc deleted file mode 100644 index 333b6fce8..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_interleave_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_32fc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex output_generic[vlen] __attribute__ ((aligned (16))); - std::complex output_sse[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_interleave_32fc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h deleted file mode 100644 index cba518d37..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H - -#include -#include - -class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc deleted file mode 100644 index 98f8ce9bc..000000000 --- a/volk/lib/qa_32f_max_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_max_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h deleted file mode 100644 index d535479f4..000000000 --- a/volk/lib/qa_32f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H -#define INCLUDED_QA_32F_MAX_ALIGNED16_H - -#include -#include - -class qa_32f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc deleted file mode 100644 index 798b47c53..000000000 --- a/volk/lib/qa_32f_min_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_min_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h deleted file mode 100644 index 90961ac92..000000000 --- a/volk/lib/qa_32f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H -#define INCLUDED_QA_32F_MIN_ALIGNED16_H - -#include -#include - -class qa_32f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc deleted file mode 100644 index aa17cd62e..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_multiply_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] * input1[i]; - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_multiply_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h deleted file mode 100644 index 7032a2ad4..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc deleted file mode 100644 index 0da43ecff..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_normalize_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_normalize_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 320001; - const int ITERS = 100; - - float* output0; - float* output01; - float* output02; - ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float)); - - for(int i = 0; i < vlen; ++i) { - output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(float)); - memcpy(output02, output0, vlen*sizeof(float)); - printf("32f_normalize_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - // printf("%e...%e\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4); - } - - free(output0); - free(output01); - free(output02); -} - -#endif diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h deleted file mode 100644 index 7c421eb82..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H -#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H - -#include -#include - -class qa_32f_normalize_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc deleted file mode 100644 index 1b331daeb..000000000 --- a/volk/lib/qa_32f_power_aligned16.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32f_power_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float* input; - int i; - - float* result_generic; - float* result_sse; - float* result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float)); - - random_floats((float*)input, vlen); - - const float power = 3; - - printf("32f_power_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - - for(i = 0; i < vlen; i++){ - //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32f_power_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h deleted file mode 100644 index d45df4e56..000000000 --- a/volk/lib/qa_32f_power_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H -#define INCLUDED_QA_32F_POWER_ALIGNED16_H - -#include -#include - -class qa_32f_power_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_power_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc deleted file mode 100644 index c216ce5d5..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_sqrt_aligned16::t1() { - printf("sse not available... no test performed\n"); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); - output_known[i] = sqrt(input0[i]); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f\n", input0[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4); - } -} - -#else - -void qa_32f_sqrt_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h deleted file mode 100644 index e4b99d981..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H -#define INCLUDED_QA_32F_SQRT_ALIGNED16_H - -#include -#include - -class qa_32f_sqrt_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc deleted file mode 100644 index 5934d70df..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean = 0; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - mean += input0[i]; - } - mean /= static_cast(vlen); - - printf("32f_stddev_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h deleted file mode 100644 index 7f8d7a5fc..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H - -#include -#include - -class qa_32f_stddev_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc deleted file mode 100644 index 78c701d78..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_and_mean_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_and_mean_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean_generic; - float mean_sse; - float mean_sse4_1; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_stddev_and_mean_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h deleted file mode 100644 index e08bd249a..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H - -#include -#include - -class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc deleted file mode 100644 index 1e2210203..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_subtract_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_subtract_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_subtract_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h deleted file mode 100644 index 97c14f129..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H -#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H - -#include -#include - -class qa_32f_subtract_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc deleted file mode 100644 index 494776357..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define SNR 30.0 -#define CENTER -4.0 -#define CUTOFF -5.595 -#define ERR_DELTA (1e-4) -#define NUM_ITERS 100000 -#define VEC_LEN 64 -static float uniform() { - return ((float) rand() / RAND_MAX); // uniformly (0, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * -SNR/2.0; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32f_sum_of_poly_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32f_sum_of_poly_aligned16::t1(){ - int i = 0; - - volk_environment_init(); - int ret; - - const int vlen = VEC_LEN; - float cutoff = CUTOFF; - - float* center_point_array; - float* target; - float* target_generic; - float* src0 ; - - - ret = posix_memalign((void**)¢er_point_array, 16, 24); - ret = posix_memalign((void**)&target, 16, 4); - ret = posix_memalign((void**)&target_generic, 16, 4); - ret = posix_memalign((void**)&src0, 16, (vlen << 2)); - - - random_floats((float*)src0, vlen); - - float a = (float)CENTER; - float etoa = expf(a); - center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 + - (-4.0 * a * a * a)/24.0 + - (3.0 * a * a)/6.0 + - (-2.0 * a)/2.0 + - (1.0)) * etoa; - center_point_array[1] = (//(-10.0 * a * a * a)/120.0 + - (6.0 * a * a)/24.0 + - (-3.0 * a)/6.0 + - (1.0/2.0)) * etoa; - center_point_array[2] = (//(10.0 * a * a)/120.0 + - (-4.0 * a)/24.0 + - (1.0/6.0)) * etoa; - center_point_array[3] = (//(-5.0 * a)/120.0 + - (1.0/24.0)) * etoa; - //center_point_array[4] = ((1.0)/120.0) * etoa; - center_point_array[4] = (//(a * a * a * a * a)/120.0 + - (a * a * a * a)/24.0 + - (a * a * a)/-6.0 + - (a * a)/2.0 + - -a + 1.0) * etoa; - - printf("32f_sum_of_poly_aligned16\n"); - - clock_t start, end; - double total; - - float my_sum = 0.0; - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - float sum = 0.0; - for(int l = 0; l < vlen; ++l) { - - sum += expf(src0[l]); - - } - my_sum = sum; - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("exp time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - - volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic"); - - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 approx time: %f\n", total); - - - - printf("exp: %f, sse3: %f\n", my_sum, target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA); - - - free(center_point_array); - free(target); - free(target_generic); - free(src0); - - -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h deleted file mode 100644 index 67a347f9a..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H - -#include -#include - -class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc deleted file mode 100644 index b80e0e008..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#define TOLERANCE (1e-4) - -void qa_32fc_32f_multiply_aligned16(void) { - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - float * taps; - int i; - std::vector archs; - archs.push_back("generic"); -#ifdef LV_HAVE_SSE3 - archs.push_back("sse3"); -#endif -#ifdef LV_HAVE_ORC - archs.push_back("orc"); -#endif - - std::vector* > results; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - - for(i=0; i < archs.size(); i++) { - std::complex *ptr; - ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float)); - if(ret) { - printf("Couldn't allocate memory\n"); - exit(1); - } - results.push_back(ptr); - } - - random_floats((float*)input, vlen * 2); - random_floats(taps, vlen); - - printf("32fc_32f_multiply_aligned16\n"); - - for(i=0; i < archs.size(); i++) { - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str()); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("%s_time: %f\n", archs[i].c_str(), total); - } - - for(i=0; i < vlen; i++) { - int j = 1; - for(j; j < archs.size(); j++) { - assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA); - } - } - - free(input); - free(taps); - for(i=0; i < archs.size(); i++) { - free(results[i]); - } -} diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h deleted file mode 100644 index fc3b3eeb2..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc deleted file mode 100644 index 64ea65da9..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1.5e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32fc_32f_power_32fc_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - int i; - - std::complex* result_generic; - std::complex* result_sse; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - - const float power = 3.2; - - printf("32fc_32f_power_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32fc_32f_power_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h deleted file mode 100644 index 464b7b7cc..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H - -#include -#include - -class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc deleted file mode 100644 index c55ab5aa0..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_atan2_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_atan2_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_atan2_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h deleted file mode 100644 index 9c4dc209a..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc deleted file mode 100644 index 2f9a30395..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -#include -#include - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h deleted file mode 100644 index 507b1769b..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 72e084c05..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h deleted file mode 100644 index 78660e6ad..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc deleted file mode 100644 index 89770c236..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_generic1[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - double output_sse21[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h deleted file mode 100644 index f924b9752..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 7472476f7..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 68b80f27d..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 5cbdc49b3..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 765450bb6..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc deleted file mode 100644 index 4147e30ae..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h deleted file mode 100644 index 3e55fb812..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc deleted file mode 100644 index bcf9ea954..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include -#include - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - - - -#if LV_HAVE_SSE3 -void qa_32fc_dot_prod_aligned16::t1() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_32 -void qa_32fc_dot_prod_aligned16::t2() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_32_time: %f\n", total); - - printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t2() { - printf("sse_32 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_64 - -void qa_32fc_dot_prod_aligned16::t3() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_64_time: %f\n", total); - - printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t3() { - printf("sse_64 not available... no test performed\n"); -} - - - -#endif diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h deleted file mode 100644 index 4b360db27..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); - void t2 (); - void t3 (); -}; - - -#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc deleted file mode 100644 index c718b6b71..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h deleted file mode 100644 index ffdf1dd9e..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H - -#include -#include - -class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc deleted file mode 100644 index 1d475fb86..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h deleted file mode 100644 index a2881308c..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc deleted file mode 100644 index 022b58ad6..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include -#include - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - int i; - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - std::complex* result_orc; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - free(result_orc); - -} -#else -void qa_32fc_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h deleted file mode 100644 index c8abaa8fe..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32fc_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc deleted file mode 100644 index 1444c78a9..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse3 - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - const float scalar = vlen; - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - - printf("32fc_power_spectrum_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); - } -} - -#endif diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h deleted file mode 100644 index d991223f3..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc deleted file mode 100644 index d9ead8495..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) -#define NUM_ITERS 10000000 -#define VEC_LEN 64 -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex* src0 ; - std::complex* points; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - - printf("32fc_square_dist_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(; i < vlen; ++i) { - //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h deleted file mode 100644 index 9d365d8b0..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H - -#include -#include - -class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc deleted file mode 100644 index f923d1d5c..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA .0001 -#define NUM_ITERS 10000000 -#define VEC_LEN 64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex* src0 ; - std::complex* points; - float scalar; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - random_floats(&scalar, 1); - - printf("32fc_square_dist_scalar_mult_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(i = 0; i < vlen; ++i) { - printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h deleted file mode 100644 index ac4e3c45b..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H - -#include -#include - -class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc deleted file mode 100644 index d20682147..000000000 --- a/volk/lib/qa_32s_and_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_and_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_and_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - int32_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_and_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h deleted file mode 100644 index dfcb47c63..000000000 --- a/volk/lib/qa_32s_and_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H -#define INCLUDED_QA_32S_AND_ALIGNED16_H - -#include -#include - -class qa_32s_and_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_and_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc deleted file mode 100644 index 07d799809..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h deleted file mode 100644 index efd2a2eea..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc deleted file mode 100644 index 2ec610ffb..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h deleted file mode 100644 index 5006f5fd8..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc deleted file mode 100644 index bebf779b0..000000000 --- a/volk/lib/qa_32s_or_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_or_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_or_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - int32_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_or_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h deleted file mode 100644 index 9e949eb52..000000000 --- a/volk/lib/qa_32s_or_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H -#define INCLUDED_QA_32S_OR_ALIGNED16_H - -#include -#include - -class qa_32s_or_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_or_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc deleted file mode 100644 index 313c786b6..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint32_t output0[vlen] __attribute__ ((aligned (16))); - uint32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint32_t)); - printf("32u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h deleted file mode 100644 index 47bad4c3d..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_32u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc deleted file mode 100644 index 7f9c4584a..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h deleted file mode 100644 index 95d79f73d..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc deleted file mode 100644 index 98aadbf4d..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h deleted file mode 100644 index 430327e81..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc deleted file mode 100644 index 76e755514..000000000 --- a/volk/lib/qa_64f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_max_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h deleted file mode 100644 index 7cbd4d4c1..000000000 --- a/volk/lib/qa_64f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H -#define INCLUDED_QA_64F_MAX_ALIGNED16_H - -#include -#include - -class qa_64f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc deleted file mode 100644 index 4b70d2881..000000000 --- a/volk/lib/qa_64f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_min_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h deleted file mode 100644 index a0e95395f..000000000 --- a/volk/lib/qa_64f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H -#define INCLUDED_QA_64F_MIN_ALIGNED16_H - -#include -#include - -class qa_64f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc deleted file mode 100644 index 20d012c9e..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint64_t output0[vlen] __attribute__ ((aligned (16))); - uint64_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint64_t)); - printf("64u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h deleted file mode 100644 index a4fa0c983..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_64u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc deleted file mode 100644 index 8dd5f76ca..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h deleted file mode 100644 index 38739fc96..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H - -#include -#include - -class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc deleted file mode 100644 index 12c502d4b..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_unaligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h deleted file mode 100644 index d39fffc35..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H - -#include -#include - -class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc deleted file mode 100644 index f27e60552..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h deleted file mode 100644 index 7f8401d42..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc deleted file mode 100644 index 43468b1b1..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_unaligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen+1] __attribute__ ((aligned (16))); - - float output_generic[vlen+1] __attribute__ ((aligned (16))); - float output_sse4_1[vlen+1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%e...%e\n", output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h deleted file mode 100644 index aad2f8c22..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index f753e1107..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_11[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse4_11[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 9c99fed70..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 29073eed7..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - float output_sse14_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max((output_generic[i])*1e-4, 1e-4)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max((output_generic[i])*1e-4, 1e-4)); - } -} - - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h deleted file mode 100644 index 63b5fdadb..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 4980c982a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 02050926f..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 3c3f737a1..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex *input0; - - float* output_generic; - float* output_sse; - float* output_sse4_1; - - ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t)); - ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float)); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0); - } - - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } - - free(input0); - free(output_generic); - free(output_sse); - free(output_sse4_1); -} - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 93338e488..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index a33d1bf30..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 92fc0dd4a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc deleted file mode 100644 index 216bf1cef..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t)); - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_16sc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex*)result_generic, (std::complex*)input, (std::complex*)taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex*)result_sse4_1, (std::complex*)input, (std::complex*)taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h deleted file mode 100644 index 0e78a5eca..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H - -#include -#include - -class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc deleted file mode 100644 index 4c707446e..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float)); - - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h deleted file mode 100644 index eb9ae309c..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H - -#include -#include - -class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc deleted file mode 100644 index 8e7e59768..000000000 --- a/volk/lib/qa_volk.cc +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -/* - * This class gathers together all the test cases for the example - * directory into a single test suite. As you create new test cases, - * add them here. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -CppUnit::TestSuite * -qa_volk::suite() -{ - CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - - s->addTest(qa_16s_quad_max_star_aligned16::suite()); - s->addTest(qa_32fc_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); - s->addTest(qa_32fc_square_dist_aligned16::suite()); - s->addTest(qa_32f_sum_of_poly_aligned16::suite()); - s->addTest(qa_32fc_index_max_aligned16::suite()); - s->addTest(qa_32f_index_max_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); - s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); - s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); - s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); - s->addTest(qa_16s_max_star_aligned16::suite()); - s->addTest(qa_16s_add_quad_aligned16::suite()); - s->addTest(qa_32f_add_aligned16::suite()); - s->addTest(qa_32f_subtract_aligned16::suite()); - s->addTest(qa_32f_max_aligned16::suite()); - s->addTest(qa_32f_min_aligned16::suite()); - s->addTest(qa_64f_max_aligned16::suite()); - s->addTest(qa_64f_min_aligned16::suite()); - s->addTest(qa_32s_and_aligned16::suite()); - s->addTest(qa_32s_or_aligned16::suite()); - s->addTest(qa_32f_dot_prod_aligned16::suite()); - s->addTest(qa_32f_dot_prod_unaligned16::suite()); - s->addTest(qa_32f_fm_detect_aligned16::suite()); - //s->addTest(qa_32fc_32f_multiply_aligned16::suite()); - s->addTest(qa_32fc_multiply_aligned16::suite()); - s->addTest(qa_32f_divide_aligned16::suite()); - s->addTest(qa_32f_multiply_aligned16::suite()); - s->addTest(qa_32f_sqrt_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite()); - s->addTest(qa_32u_popcnt_aligned16::suite()); - s->addTest(qa_64u_popcnt_aligned16::suite()); - s->addTest(qa_16u_byteswap_aligned16::suite()); - s->addTest(qa_32u_byteswap_aligned16::suite()); - s->addTest(qa_64u_byteswap_aligned16::suite()); - s->addTest(qa_32f_normalize_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_16s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_64f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite()); - s->addTest(qa_32fc_magnitude_16s_aligned16::suite()); - s->addTest(qa_32fc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32f_interleave_16sc_aligned16::suite()); - s->addTest(qa_32f_interleave_32fc_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16s_convert_32f_aligned16::suite()); - s->addTest(qa_16s_convert_32f_unaligned16::suite()); - s->addTest(qa_16s_convert_8s_aligned16::suite()); - s->addTest(qa_16s_convert_8s_unaligned16::suite()); - s->addTest(qa_32f_convert_16s_aligned16::suite()); - s->addTest(qa_32f_convert_16s_unaligned16::suite()); - s->addTest(qa_32f_convert_32s_aligned16::suite()); - s->addTest(qa_32f_convert_32s_unaligned16::suite()); - s->addTest(qa_32f_convert_64f_aligned16::suite()); - s->addTest(qa_32f_convert_64f_unaligned16::suite()); - s->addTest(qa_32f_convert_8s_aligned16::suite()); - s->addTest(qa_32f_convert_8s_unaligned16::suite()); - s->addTest(qa_32s_convert_32f_aligned16::suite()); - s->addTest(qa_32s_convert_32f_unaligned16::suite()); - s->addTest(qa_64f_convert_32f_aligned16::suite()); - s->addTest(qa_64f_convert_32f_unaligned16::suite()); - s->addTest(qa_8s_convert_16s_aligned16::suite()); - s->addTest(qa_8s_convert_16s_unaligned16::suite()); - s->addTest(qa_8s_convert_32f_aligned16::suite()); - s->addTest(qa_8s_convert_32f_unaligned16::suite()); - s->addTest(qa_32fc_32f_power_32fc_aligned16::suite()); - s->addTest(qa_32f_power_aligned16::suite()); - s->addTest(qa_32fc_atan2_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite()); - s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite()); - s->addTest(qa_32f_accumulator_aligned16::suite()); - s->addTest(qa_32f_stddev_aligned16::suite()); - s->addTest(qa_32f_stddev_and_mean_aligned16::suite()); - - return s; -} diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h deleted file mode 100644 index 43fa7faba..000000000 --- a/volk/lib/qa_volk.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU Example Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Example Public License for more details. - * - * You should have received a copy of the GNU Example Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#ifndef INCLUDED_QA_VOLK_H -#define INCLUDED_QA_VOLK_H - -#include - -//! collect all the tests for the example directory - -class qa_volk { - public: - //! return suite of tests for all of example directory - static CppUnit::TestSuite *suite (); -}; - -#endif /* INCLUDED_QA_VOLK_H */ diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc deleted file mode 100644 index 50ac08eab..000000000 --- a/volk/lib/test_all.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2002,2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -int -main (int argc, char **argv) -{ - - int opt = 0; - std::string xmlOutputFile(""); - - while( (opt = getopt(argc, argv, "o:")) != -1){ - switch(opt){ - case 'o': - if(optarg){ - xmlOutputFile.assign(optarg); - } - else{ - std::cerr << "No xml file output specified for -o" << std::endl; - exit(EXIT_FAILURE); - } - break; - - default: /* '?' */ - fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n", - argv[0]); - exit(EXIT_FAILURE); - } - - } - - CppUnit::TextUi::TestRunner runner; - - runner.addTest (qa_volk::suite ()); - - bool was_successful = false; - if(!xmlOutputFile.empty()){ - std::ofstream xmlOutput(xmlOutputFile.c_str()); - if(xmlOutput.is_open()){ - runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput)); - - was_successful = runner.run("", false, true, false); - } - xmlOutput.close(); - } - else{ - was_successful = runner.run ("", false); - } - - return was_successful ? 0 : 1; -} -- cgit From f832c9789be9fec46e211be4fb2355013d19c000 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 21 Jan 2011 18:24:02 -0800 Subject: Volk: Small changes to speed things up. --- volk/lib/qa_utils.cc | 2 +- volk/lib/testqa.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 67ce5ddef..9cafd459f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -32,7 +32,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { if(type.size == 8) random_floats((double *)data, n); else random_floats((float *)data, n); } else { - float int_max = pow(2, type.size*8); + float int_max = float(uint64_t(2) << (type.size*8)); if(type.is_signed) int_max /= 2.0; for(int i=0; i((RAND_MAX/2))) * int_max; diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 9f4934dc0..4cef7b443 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); -- cgit From b0a23e876fe0f92afb2c55fd4fbce6427e9598d8 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 25 Jan 2011 15:06:23 -0800 Subject: Volk: doesn't test a routine if no valid architectures other than generic are found --- volk/lib/qa_utils.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 9cafd459f..6a6f87d85 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -288,6 +288,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //first let's get a list of available architectures for the test std::vector arch_list = get_arch_list(archs); + if(arch_list.size() < 2) { + std::cout << "no architectures to test" << std::endl; + return false; + } + //now we have to get a function signature by parsing the name std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); -- cgit From e979880d446949b2d2a93087011579c383369819 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 13 Jan 2011 18:57:48 +0000 Subject: Volk: QA util has proper free(). --- volk/lib/qa_utils.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 6a6f87d85..e85e2c1bc 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -309,10 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //for(int i=0; i inbuffs; - + std::vector free_buffs; //this is just a list of void*'s that i'll have to free later. + //we need it because we dupe void*s in test_data below. make_buffer_for_signature(inbuffs, inputsig, vlen); for(int i=0; i arch_buffs; for(int j=0; j. + */ + +/* %ecx */ +#define bit_SSE3 (1 << 0) +#define bit_PCLMUL (1 << 1) +#define bit_SSSE3 (1 << 9) +#define bit_FMA (1 << 12) +#define bit_CMPXCHG16B (1 << 13) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_MOVBE (1 << 22) +#define bit_POPCNT (1 << 23) +#define bit_AES (1 << 25) +#define bit_XSAVE (1 << 26) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) + +/* %edx */ +#define bit_CMPXCHG8B (1 << 8) +#define bit_CMOV (1 << 15) +#define bit_MMX (1 << 23) +#define bit_FXSAVE (1 << 24) +#define bit_SSE (1 << 25) +#define bit_SSE2 (1 << 26) + +/* Extended Features */ +/* %ecx */ +#define bit_LAHF_LM (1 << 0) +#define bit_SSE4a (1 << 6) +#define bit_SSE5 (1 << 11) + +/* %edx */ +#define bit_LM (1 << 29) +#define bit_3DNOWP (1 << 30) +#define bit_3DNOW (1 << 31) + + +#if defined(__i386__) && defined(__PIC__) +/* %ebx may be the PIC register. */ +#if __GNUC__ >= 3 +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif +#else +#define __cpuid(level, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif + +/* Return highest supported input value for cpuid instruction. ext can + be either 0x0 or 0x8000000 to return highest supported value for + basic or extended cpuid information. Function returns 0 if cpuid + is not supported or whatever cpuid returns in eax register. If sig + pointer is non-null, then first four bytes of the signature + (as found in ebx register) are returned in location pointed by sig. */ + +static __inline unsigned int +__get_cpuid_max (unsigned int __ext, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; + +#ifndef __x86_64__ +#if __GNUC__ >= 3 + /* See if we can use cpuid. On AMD64 we always can. */ + __asm__ ("pushf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "mov{l}\t{%0, %1|%1, %0}\n\t" + "xor{l}\t{%2, %0|%0, %2}\n\t" + "push{l}\t%0\n\t" + "popf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "popf{l|d}\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ + __asm__ ("pushfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "movl\t%0, %1\n\t" + "xorl\t%2, %0\n\t" + "pushl\t%0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "popfl\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#endif + + if (!((__eax ^ __ebx) & 0x00200000)) + return 0; +#endif + + /* Host supports cpuid. Return highest supported cpuid input value. */ + __cpuid (__ext, __eax, __ebx, __ecx, __edx); + + if (__sig) + *__sig = __ebx; + + return __eax; +} + +/* Return cpuid data for requested cpuid level, as found in returned + eax, ebx, ecx and edx registers. The function checks if cpuid is + supported and returns 1 for valid cpuid information or 0 for + unsupported cpuid level. All pointers are required to be non-null. */ + +static __inline int +__get_cpuid (unsigned int __level, + unsigned int *__eax, unsigned int *__ebx, + unsigned int *__ecx, unsigned int *__edx) +{ + unsigned int __ext = __level & 0x80000000; + + if (__get_cpuid_max (__ext, 0) < __level) + return 0; + + __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} -- cgit From 108a594c0838ad21f93cba6597d1f66af097b157 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 25 Jan 2011 10:37:49 -0500 Subject: volk: New volk kernel for conjugate dot products with unaligned buffers. Note: need to convert this to new naming standard. --- volk/lib/Makefile.am | 2 + volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 +++++++++++++++++++++++ volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h | 18 +++ volk/lib/qa_volk.cc | 2 + 4 files changed, 160 insertions(+) create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 7a355e86a..beb815e63 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -93,6 +93,7 @@ libvolk_qa_la_SOURCES = \ qa_32fc_index_max_aligned16.cc \ qa_32f_index_max_aligned16.cc \ qa_32fc_conjugate_dot_prod_aligned16.cc \ + qa_32fc_conjugate_dot_prod_unaligned.cc \ qa_16s_permute_and_scalar_add_aligned16.cc \ qa_16s_branch_4_state_8_aligned16.cc \ qa_16s_max_star_horizontal_aligned16.cc \ @@ -195,6 +196,7 @@ noinst_HEADERS = \ qa_32fc_index_max_aligned16.h \ qa_32f_index_max_aligned16.h \ qa_32fc_conjugate_dot_prod_aligned16.h \ + qa_32fc_conjugate_dot_prod_unaligned.h \ qa_16s_permute_and_scalar_add_aligned16.h \ qa_16s_branch_4_state_8_aligned16.h \ qa_16s_max_star_horizontal_aligned16.h \ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc new file mode 100644 index 000000000..a0680bab6 --- /dev/null +++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse + +#if LV_HAVE_SSE && LV_HAVE_64 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_conjugate_dot_prod_unaligned::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse"); + + printf("32fc_conjugate_dot_prod_unaligned\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#elif LV_HAVE_SSE && LV_HAVE_32 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_conjugate_dot_prod_unaligned::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32"); + + printf("32fc_conjugate_dot_prod_unaligned\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#else + +void qa_32fc_conjugate_dot_prod_unaligned::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h new file mode 100644 index 000000000..7aead53a1 --- /dev/null +++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H +#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H + +#include +#include + +class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index c3c27b69b..98d3e9728 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ qa_volk::suite() s->addTest(qa_32fc_index_max_aligned16::suite()); s->addTest(qa_32f_index_max_aligned16::suite()); s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); + s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite()); s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); -- cgit From 023167ca8a85ab597f9e59302733f71809a8afbd Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 25 Jan 2011 21:36:01 -0500 Subject: volk: Adding explicit links to local volk libraries. Required to prevent breakage when adding new volk kernels. --- volk/lib/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index beb815e63..446ff574f 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -285,7 +285,7 @@ noinst_PROGRAMS = \ test_all test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk_qa.la +test_all_LDADD = libvolk.la libvolk_runtime.la libvolk_qa.la distclean-local: -- cgit From 2a4c4f89187bf75caa34c7bc52fc32310a75c9f2 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 26 Jan 2011 15:28:35 -0800 Subject: Volk: fixed volk_8i_s32f_convert_32f_a16_orc_impl. --- volk/lib/testqa.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 4cef7b443..d6b9e347d 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -49,8 +49,8 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); // VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); @@ -60,7 +60,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); // VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); -- cgit From 5ebd9ef2580aa36cd3a636c6257bd4b80b2380f8 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 26 Jan 2011 15:44:40 -0800 Subject: Volk: find built headers instead of installed ones --- volk/lib/Makefile.am | 2 +- volk/lib/testqa.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index afd29a352..6f3d7fd86 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -120,7 +120,7 @@ noinst_PROGRAMS = \ testqa testqa_SOURCES = testqa.cc qa_utils.cc -testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN +testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) if LV_HAVE_ORC testqa_LDADD = \ diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index d6b9e347d..e9734411b 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -1,6 +1,6 @@ #include "qa_utils.h" -#include "../include/volk/volk.h" -#include "../include/volk/volk_registry.h" +#include +#include #include BOOST_AUTO_TEST_CASE(volk_test_all) { -- cgit From e34a484084a5224ec3412bd7d6c6f285301f5d43 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 26 Jan 2011 15:47:56 -0800 Subject: Volk: renamed volk_32fc_32f_power_32fc_a16 to volk_32fc_s32f_power_32fc_a16 --- volk/lib/testqa.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index e9734411b..f33670856 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); -- cgit From 6503e3b21978b71908400c994148836bec4a97b9 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 30 Jan 2011 12:35:07 -0500 Subject: volk: Updating build structure to work when orc is not installed. Distcheck passes for me if liborc is installed or not. --- volk/lib/Makefile.am | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 6f3d7fd86..af7c7f335 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -45,7 +45,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ # list of programs run by "make check" and "make distcheck" -TESTS = testqa +#TESTS = testqa #orc stuff gets built in the ORC directory conditional to ORC being enabled. #it gets linked in during the build of libvolk as an added library. #there might be a better way to do this. @@ -77,7 +77,7 @@ libvolk_la_SOURCES = \ volk_orc_LDFLAGS = \ $(ORC_LDFLAGS) \ -lorc-0.4 - + volk_orc_LIBADD = \ ../orc/libvolk_orc.la @@ -103,7 +103,6 @@ endif #libvolk_qa_la_LIBADD = \ # libvolk.la \ # libvolk_runtime.la - # ---------------------------------------------------------------- # headers that don't get installed -- cgit From 736874202f15222fa3ec10ceeb1815e8a595ed3a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 2 Feb 2011 13:55:15 -0500 Subject: volk: cleaning up makefile issues after merge. --- volk/lib/Makefile.am | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index af7c7f335..3e5502369 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -57,7 +57,8 @@ lib_LTLIBRARIES = \ EXTRA_DIST = \ volk_mktables.c \ volk_rank_archs.h \ - volk_proccpu_sim.c + volk_proccpu_sim.c \ + gcc_x86_cpuid.h # ---------------------------------------------------------------- # The main library @@ -109,8 +110,7 @@ endif # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_utils.h \ - assembly.h + qa_utils.h # ---------------------------------------------------------------- # Our test program -- cgit From b806f6e95cd917e54884841c8e7928204ecd78f8 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 2 Feb 2011 14:21:46 -0500 Subject: volk: updating to readd unaligned dot product under new name scheme. --- volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 --------------- volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h | 18 -- volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc | 138 +++++++++++++++ volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h | 18 ++ volk/lib/qa_volk.cc | 213 ----------------------- volk/lib/testqa.cc | 1 + 6 files changed, 157 insertions(+), 369 deletions(-) delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h delete mode 100644 volk/lib/qa_volk.cc (limited to 'volk/lib') diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc deleted file mode 100644 index a0680bab6..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -#include -#include - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_unaligned\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_unaligned\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h deleted file mode 100644 index 7aead53a1..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H - -#include -#include - -class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */ diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc new file mode 100644 index 000000000..fefdf06ee --- /dev/null +++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse + +#if LV_HAVE_SSE && LV_HAVE_64 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse"); + + printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#elif LV_HAVE_SSE && LV_HAVE_32 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32"); + + printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#else + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h new file mode 100644 index 000000000..f07402403 --- /dev/null +++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H +#define INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H + +#include +#include + +class qa_32fc_x2_conjugate_dot_prod_32fc_u : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_x2_conjugate_dot_prod_32fc_u); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc deleted file mode 100644 index 98d3e9728..000000000 --- a/volk/lib/qa_volk.cc +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -/* - * This class gathers together all the test cases for the example - * directory into a single test suite. As you create new test cases, - * add them here. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -CppUnit::TestSuite * -qa_volk::suite() -{ - CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - - s->addTest(qa_16s_quad_max_star_aligned16::suite()); - s->addTest(qa_32fc_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); - s->addTest(qa_32fc_square_dist_aligned16::suite()); - s->addTest(qa_32f_sum_of_poly_aligned16::suite()); - s->addTest(qa_32fc_index_max_aligned16::suite()); - s->addTest(qa_32f_index_max_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite()); - s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); - s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); - s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); - s->addTest(qa_16s_max_star_aligned16::suite()); - s->addTest(qa_16s_add_quad_aligned16::suite()); - s->addTest(qa_32f_add_aligned16::suite()); - s->addTest(qa_32f_subtract_aligned16::suite()); - s->addTest(qa_32f_max_aligned16::suite()); - s->addTest(qa_32f_min_aligned16::suite()); - s->addTest(qa_64f_max_aligned16::suite()); - s->addTest(qa_64f_min_aligned16::suite()); - s->addTest(qa_32s_and_aligned16::suite()); - s->addTest(qa_32s_or_aligned16::suite()); - s->addTest(qa_32f_dot_prod_aligned16::suite()); - s->addTest(qa_32f_dot_prod_unaligned16::suite()); - s->addTest(qa_32f_fm_detect_aligned16::suite()); - s->addTest(qa_32fc_32f_multiply_aligned16::suite()); - s->addTest(qa_32fc_multiply_aligned16::suite()); - s->addTest(qa_32f_divide_aligned16::suite()); - s->addTest(qa_32f_multiply_aligned16::suite()); - s->addTest(qa_32f_sqrt_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite()); - s->addTest(qa_32u_popcnt_aligned16::suite()); - s->addTest(qa_64u_popcnt_aligned16::suite()); - s->addTest(qa_16u_byteswap_aligned16::suite()); - s->addTest(qa_32u_byteswap_aligned16::suite()); - s->addTest(qa_64u_byteswap_aligned16::suite()); - s->addTest(qa_32f_normalize_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_16s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_64f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite()); - s->addTest(qa_32fc_magnitude_16s_aligned16::suite()); - s->addTest(qa_32fc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32f_interleave_16sc_aligned16::suite()); - s->addTest(qa_32f_interleave_32fc_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16s_convert_32f_aligned16::suite()); - s->addTest(qa_16s_convert_32f_unaligned16::suite()); - s->addTest(qa_16s_convert_8s_aligned16::suite()); - s->addTest(qa_16s_convert_8s_unaligned16::suite()); - s->addTest(qa_32f_convert_16s_aligned16::suite()); - s->addTest(qa_32f_convert_16s_unaligned16::suite()); - s->addTest(qa_32f_convert_32s_aligned16::suite()); - s->addTest(qa_32f_convert_32s_unaligned16::suite()); - s->addTest(qa_32f_convert_64f_aligned16::suite()); - s->addTest(qa_32f_convert_64f_unaligned16::suite()); - s->addTest(qa_32f_convert_8s_aligned16::suite()); - s->addTest(qa_32f_convert_8s_unaligned16::suite()); - s->addTest(qa_32s_convert_32f_aligned16::suite()); - s->addTest(qa_32s_convert_32f_unaligned16::suite()); - s->addTest(qa_64f_convert_32f_aligned16::suite()); - s->addTest(qa_64f_convert_32f_unaligned16::suite()); - s->addTest(qa_8s_convert_16s_aligned16::suite()); - s->addTest(qa_8s_convert_16s_unaligned16::suite()); - s->addTest(qa_8s_convert_32f_aligned16::suite()); - s->addTest(qa_8s_convert_32f_unaligned16::suite()); - s->addTest(qa_32fc_32f_power_32fc_aligned16::suite()); - s->addTest(qa_32f_power_aligned16::suite()); - s->addTest(qa_32fc_atan2_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite()); - s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite()); - s->addTest(qa_32f_accumulator_aligned16::suite()); - s->addTest(qa_32f_stddev_aligned16::suite()); - s->addTest(qa_32f_stddev_and_mean_aligned16::suite()); - - return s; -} diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index f33670856..779bc61eb 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); -- cgit From b013372e7e02461bf5e67845b333030eee164bea Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Tue, 8 Mar 2011 16:33:17 -0800 Subject: volk: replace posix_memalign with something cross platform --- volk/lib/qa_utils.cc | 57 ++++++++++++++++++++++++---------------------------- volk/lib/qa_utils.h | 2 +- 2 files changed, 27 insertions(+), 32 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index e85e2c1bc..710d56fb8 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -1,19 +1,20 @@ #include "qa_utils.h" -#include +#include #include #include #include //#include #include #include -#include -#include +#include +#include #include //#include #include #include #include #include +#include float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) @@ -61,22 +62,6 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -void *make_aligned_buffer(unsigned int len, unsigned int size) { - void *buf; - int ret; - ret = posix_memalign((void**)&buf, 16, len * size); - assert(ret == 0); - memset(buf, 0x00, len*size); - return buf; -} - -void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { - BOOST_FOREACH(volk_type_t sig, inputsig) { - if(!sig.is_scalar) //we don't make buffers for scalars - buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); - } -} - static std::vector get_arch_list(const int archs[]) { std::vector archlist; int num_archs = archs[0]; @@ -282,6 +267,18 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { return fail; } +class volk_qa_aligned_mem_pool{ +public: + void *get_new(size_t size, size_t alignment = 16){ + boost::shared_array mem(new char[size + alignment-1]); + size_t ptr = size_t(mem.get() + alignment-1) & ~(alignment-1); + std::memset((void *)ptr, 0x00, size); + _mems.push_back(mem); + return (void *)ptr; + } +private: std::vector > _mems; +}; + bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; @@ -292,7 +289,10 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, std::cout << "no architectures to test" << std::endl; return false; } - + + //something that can hang onto memory and cleanup when this function exits + volk_qa_aligned_mem_pool mem_pool; + //now we have to get a function signature by parsing the name std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); @@ -309,12 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //for(int i=0; i inbuffs; - std::vector free_buffs; //this is just a list of void*'s that i'll have to free later. - //we need it because we dupe void*s in test_data below. - make_buffer_for_signature(inbuffs, inputsig, vlen); + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1))); + } for(int i=0; i arch_buffs; for(int j=0; j +#include #include struct volk_type_t { -- cgit From 6673be777cd5395ae867e67db8c95aa09066617a Mon Sep 17 00:00:00 2001 From: Johnathan Corgan Date: Sat, 12 Mar 2011 15:47:40 -0800 Subject: Added/updated ignore files. --- volk/lib/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'volk/lib') diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 0f17543ab..6a5fde28f 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -20,3 +20,4 @@ /volk_proccpu_sim.c /volk_runtime.c /test_all +/testqa -- cgit From 888beebf6015d9a88dbd1c3c842cf2490899a99b Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 14 Mar 2011 09:33:00 -0700 Subject: volk: simplify the get new method for the aligned pool --- volk/lib/qa_utils.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 710d56fb8..b0f63d2b5 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -6,6 +6,7 @@ //#include #include #include +#include #include #include #include @@ -14,7 +15,6 @@ #include #include #include -#include float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) @@ -270,13 +270,11 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { class volk_qa_aligned_mem_pool{ public: void *get_new(size_t size, size_t alignment = 16){ - boost::shared_array mem(new char[size + alignment-1]); - size_t ptr = size_t(mem.get() + alignment-1) & ~(alignment-1); - std::memset((void *)ptr, 0x00, size); - _mems.push_back(mem); - return (void *)ptr; + _mems.push_back(std::vector(size + alignment-1, 0)); + size_t ptr = size_t(&_mems.back().front()); + return (void *)((ptr + alignment-1) & ~(alignment-1)); } -private: std::vector > _mems; +private: std::list > _mems; }; bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { -- cgit