diff options
author | Tom Rondeau | 2011-02-01 23:19:28 -0500 |
---|---|---|
committer | Tom Rondeau | 2011-02-01 23:19:28 -0500 |
commit | 800686701206e438e5a5d645242137f9285c4fa9 (patch) | |
tree | a95ad9bac09e3a6cec43f741cc3eab17771a2fe2 /volk/lib | |
parent | 023167ca8a85ab597f9e59302733f71809a8afbd (diff) | |
parent | 6503e3b21978b71908400c994148836bec4a97b9 (diff) | |
download | gnuradio-800686701206e438e5a5d645242137f9285c4fa9.tar.gz gnuradio-800686701206e438e5a5d645242137f9285c4fa9.tar.bz2 gnuradio-800686701206e438e5a5d645242137f9285c4fa9.zip |
Merge branch 'volk_rename' into next
Conflicts:
volk/include/volk/Makefile.am
volk/lib/Makefile.am
volk/lib/qa_volk.cc
Diffstat (limited to 'volk/lib')
160 files changed, 624 insertions, 7923 deletions
diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 446ff574f..af7c7f335 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -24,18 +24,19 @@ include $(top_srcdir)/Makefile.common # of a hack. Figure out the right way to do this to find built # volk_config.h and volk_tables.h -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ -I$(top_builddir)/include \ $(LV_CXXFLAGS) $(WITH_INCLUDES) -# We build 2 libraries and 1 executable here. One library contains -# everything except the libcppunit QA code, and one contains only the -# libcppunit-based QA code. The C++ QA code is especially recommended +# We build 1 library and 1 executable here. The library contains +# everything except the QA code. The C++ QA code is especially recommended # when you have general purpose C or C++ code that may not get # thoroughly exercised by building and running a GR block. The # executable runs the QA code at "make check" time. # +# +# # N.B., If there's a SWIG generated shared library and associated # python code, it will be contained in ../python, not here. (That # code is conditionally built depending on the state of the @@ -44,13 +45,14 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" -#TESTS = test_all - +#TESTS = testqa +#orc stuff gets built in the ORC directory conditional to ORC being enabled. +#it gets linked in during the build of libvolk as an added library. +#there might be a better way to do this. lib_LTLIBRARIES = \ libvolk.la \ - libvolk_runtime.la \ - libvolk_qa.la + libvolk_runtime.la EXTRA_DIST = \ volk_mktables.c \ @@ -72,221 +74,63 @@ libvolk_la_SOURCES = \ volk.c \ volk_environment_init.c +volk_orc_LDFLAGS = \ + $(ORC_LDFLAGS) \ + -lorc-0.4 -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +volk_orc_LIBADD = \ + ../orc/libvolk_orc.la +if LV_HAVE_ORC +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) +else +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_la_LIBADD = - +endif # ---------------------------------------------------------------- # The QA library. Note libvolk.la in LIBADD # ---------------------------------------------------------------- -libvolk_qa_la_SOURCES = \ - qa_volk.cc \ - qa_16s_quad_max_star_aligned16.cc \ - qa_32fc_dot_prod_aligned16.cc \ - qa_32fc_square_dist_aligned16.cc \ - qa_32fc_square_dist_scalar_mult_aligned16.cc \ - qa_32f_sum_of_poly_aligned16.cc \ - qa_32fc_index_max_aligned16.cc \ - qa_32f_index_max_aligned16.cc \ - qa_32fc_conjugate_dot_prod_aligned16.cc \ - qa_32fc_conjugate_dot_prod_unaligned.cc \ - qa_16s_permute_and_scalar_add_aligned16.cc \ - qa_16s_branch_4_state_8_aligned16.cc \ - qa_16s_max_star_horizontal_aligned16.cc \ - qa_16s_max_star_aligned16.cc \ - qa_16s_add_quad_aligned16.cc \ - qa_32f_add_aligned16.cc \ - qa_32f_subtract_aligned16.cc \ - qa_32f_max_aligned16.cc \ - qa_32f_min_aligned16.cc \ - qa_64f_max_aligned16.cc \ - qa_64f_min_aligned16.cc \ - qa_32s_and_aligned16.cc \ - qa_32s_or_aligned16.cc \ - qa_32f_dot_prod_aligned16.cc \ - qa_32f_dot_prod_unaligned16.cc \ - qa_32f_fm_detect_aligned16.cc \ - qa_32fc_32f_multiply_aligned16.cc \ - qa_32fc_multiply_aligned16.cc \ - qa_32f_divide_aligned16.cc \ - qa_32f_multiply_aligned16.cc \ - qa_32f_sqrt_aligned16.cc \ - qa_8sc_multiply_conjugate_16sc_aligned16.cc \ - qa_8sc_multiply_conjugate_32fc_aligned16.cc \ - qa_32u_popcnt_aligned16.cc \ - qa_64u_popcnt_aligned16.cc \ - qa_64u_byteswap_aligned16.cc \ - qa_8sc_deinterleave_32f_aligned16.cc \ - qa_16sc_deinterleave_32f_aligned16.cc \ - qa_8sc_deinterleave_16s_aligned16.cc \ - qa_32f_interleave_32fc_aligned16.cc \ - qa_16u_byteswap_aligned16.cc \ - qa_16sc_deinterleave_16s_aligned16.cc \ - qa_32fc_deinterleave_real_32f_aligned16.cc \ - qa_32fc_magnitude_32f_aligned16.cc \ - qa_32fc_deinterleave_real_64f_aligned16.cc \ - qa_32fc_deinterleave_real_16s_aligned16.cc \ - qa_32fc_magnitude_16s_aligned16.cc \ - qa_32fc_deinterleave_32f_aligned16.cc \ - qa_8sc_deinterleave_real_8s_aligned16.cc \ - qa_32fc_deinterleave_64f_aligned16.cc \ - qa_32f_interleave_16sc_aligned16.cc \ - qa_16sc_deinterleave_real_8s_aligned16.cc \ - qa_16sc_deinterleave_real_32f_aligned16.cc \ - qa_16sc_magnitude_32f_aligned16.cc \ - qa_32u_byteswap_aligned16.cc \ - qa_16sc_deinterleave_real_16s_aligned16.cc \ - qa_8sc_deinterleave_real_32f_aligned16.cc \ - qa_16sc_magnitude_16s_aligned16.cc \ - qa_32f_normalize_aligned16.cc \ - qa_8sc_deinterleave_real_16s_aligned16.cc \ - qa_16s_convert_32f_aligned16.cc \ - qa_16s_convert_32f_unaligned16.cc \ - qa_16s_convert_8s_aligned16.cc \ - qa_16s_convert_8s_unaligned16.cc \ - qa_32f_convert_16s_aligned16.cc \ - qa_32f_convert_16s_unaligned16.cc \ - qa_32f_convert_32s_aligned16.cc \ - qa_32f_convert_32s_unaligned16.cc \ - qa_32f_convert_64f_aligned16.cc \ - qa_32f_convert_64f_unaligned16.cc \ - qa_32f_convert_8s_aligned16.cc \ - qa_32f_convert_8s_unaligned16.cc \ - qa_32s_convert_32f_aligned16.cc \ - qa_32s_convert_32f_unaligned16.cc \ - qa_64f_convert_32f_aligned16.cc \ - qa_64f_convert_32f_unaligned16.cc \ - qa_8s_convert_16s_aligned16.cc \ - qa_8s_convert_16s_unaligned16.cc \ - qa_8s_convert_32f_aligned16.cc \ - qa_8s_convert_32f_unaligned16.cc \ - qa_32fc_32f_power_32fc_aligned16.cc \ - qa_32f_power_aligned16.cc \ - qa_32fc_atan2_32f_aligned16.cc \ - qa_32fc_power_spectral_density_32f_aligned16.cc \ - qa_32fc_power_spectrum_32f_aligned16.cc \ - qa_32f_calc_spectral_noise_floor_aligned16.cc \ - qa_32f_accumulator_aligned16.cc \ - qa_32f_stddev_aligned16.cc \ - qa_32f_stddev_and_mean_aligned16.cc +#libvolk_qa_la_SOURCES = \ +# qa_utils.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost -libvolk_qa_la_LIBADD = \ - libvolk.la \ - libvolk_runtime.la \ - $(CPPUNIT_LIBS) +#libvolk_qa_la_LIBADD = \ +# libvolk.la \ +# libvolk_runtime.la # ---------------------------------------------------------------- # headers that don't get installed # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_volk.h \ - gcc_x86_cpuid.h \ - qa_16s_quad_max_star_aligned16.h \ - qa_32fc_dot_prod_aligned16.h \ - qa_32fc_square_dist_aligned16.h \ - qa_32fc_square_dist_scalar_mult_aligned16.h \ - qa_32f_sum_of_poly_aligned16.h \ - qa_32fc_index_max_aligned16.h \ - qa_32f_index_max_aligned16.h \ - qa_32fc_conjugate_dot_prod_aligned16.h \ - qa_32fc_conjugate_dot_prod_unaligned.h \ - qa_16s_permute_and_scalar_add_aligned16.h \ - qa_16s_branch_4_state_8_aligned16.h \ - qa_16s_max_star_horizontal_aligned16.h \ - qa_16s_max_star_aligned16.h \ - qa_16s_add_quad_aligned16.h \ - qa_32f_add_aligned16.h \ - qa_32f_subtract_aligned16.h \ - qa_32f_max_aligned16.h \ - qa_32f_min_aligned16.h \ - qa_64f_max_aligned16.h \ - qa_64f_min_aligned16.h \ - qa_32s_and_aligned16.h \ - qa_32s_or_aligned16.h \ - qa_32f_dot_prod_aligned16.h \ - qa_32f_dot_prod_unaligned16.h \ - qa_32f_fm_detect_aligned16.h \ - qa_32fc_32f_multiply_aligned16.h \ - qa_32fc_multiply_aligned16.h \ - qa_32f_divide_aligned16.h \ - qa_32f_multiply_aligned16.h \ - qa_32f_sqrt_aligned16.h \ - qa_8sc_multiply_conjugate_16sc_aligned16.h \ - qa_8sc_multiply_conjugate_32fc_aligned16.h \ - qa_32u_popcnt_aligned16.h \ - qa_64u_popcnt_aligned16.h \ - qa_64u_byteswap_aligned16.h \ - qa_8sc_deinterleave_32f_aligned16.h \ - qa_16sc_deinterleave_32f_aligned16.h \ - qa_8sc_deinterleave_16s_aligned16.h \ - qa_32f_interleave_32fc_aligned16.h \ - qa_16u_byteswap_aligned16.h \ - qa_16sc_deinterleave_16s_aligned16.h \ - qa_32fc_deinterleave_real_32f_aligned16.h \ - qa_32fc_magnitude_32f_aligned16.h \ - qa_32fc_deinterleave_real_64f_aligned16.h \ - qa_32fc_deinterleave_real_16s_aligned16.h \ - qa_32fc_magnitude_16s_aligned16.h \ - qa_32fc_deinterleave_32f_aligned16.h \ - qa_8sc_deinterleave_real_8s_aligned16.h \ - qa_32fc_deinterleave_64f_aligned16.h \ - qa_32f_interleave_16sc_aligned16.h \ - qa_16sc_deinterleave_real_8s_aligned16.h \ - qa_16sc_deinterleave_real_32f_aligned16.h \ - qa_16sc_magnitude_32f_aligned16.h \ - qa_32u_byteswap_aligned16.h \ - qa_16sc_deinterleave_real_16s_aligned16.h \ - qa_8sc_deinterleave_real_32f_aligned16.h \ - qa_16sc_magnitude_16s_aligned16.h \ - qa_32f_normalize_aligned16.h \ - qa_8sc_deinterleave_real_16s_aligned16.h \ - qa_16s_convert_32f_aligned16.h \ - qa_16s_convert_32f_unaligned16.h \ - qa_16s_convert_8s_aligned16.h \ - qa_16s_convert_8s_unaligned16.h \ - qa_32f_convert_16s_aligned16.h \ - qa_32f_convert_16s_unaligned16.h \ - qa_32f_convert_32s_aligned16.h \ - qa_32f_convert_32s_unaligned16.h \ - qa_32f_convert_64f_aligned16.h \ - qa_32f_convert_64f_unaligned16.h \ - qa_32f_convert_8s_aligned16.h \ - qa_32f_convert_8s_unaligned16.h \ - qa_32s_convert_32f_aligned16.h \ - qa_32s_convert_32f_unaligned16.h \ - qa_64f_convert_32f_aligned16.h \ - qa_64f_convert_32f_unaligned16.h \ - qa_8s_convert_16s_aligned16.h \ - qa_8s_convert_16s_unaligned16.h \ - qa_8s_convert_32f_aligned16.h \ - qa_8s_convert_32f_unaligned16.h \ - qa_32fc_32f_power_32fc_aligned16.h \ - qa_32f_power_aligned16.h \ - qa_32fc_atan2_32f_aligned16.h \ - qa_32fc_power_spectral_density_32f_aligned16.h \ - qa_32fc_power_spectrum_32f_aligned16.h \ - qa_32f_calc_spectral_noise_floor_aligned16.h \ - qa_32f_accumulator_aligned16.h \ - qa_32f_stddev_aligned16.h \ - qa_32f_stddev_and_mean_aligned16.h - + qa_utils.h \ + assembly.h # ---------------------------------------------------------------- # Our test program # ---------------------------------------------------------------- noinst_PROGRAMS = \ - test_all - -test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk.la libvolk_runtime.la libvolk_qa.la + testqa +testqa_SOURCES = testqa.cc qa_utils.cc +testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) +testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) +if LV_HAVE_ORC +testqa_LDADD = \ + libvolk.la \ + libvolk_runtime.la \ + ../orc/libvolk_orc.la +else +testqa_LDADD = \ + libvolk.la \ + libvolk_runtime.la +endif distclean-local: rm -f volk.c diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc deleted file mode 100644 index 6215f4a64..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_convert_32f_aligned16.h> -#include <volk/volk_16s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h deleted file mode 100644 index ef813d96f..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc deleted file mode 100644 index 46c2e48ac..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_convert_32f_unaligned16.h> -#include <volk/volk_16s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_unaligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h deleted file mode 100644 index aeb04f770..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc deleted file mode 100644 index 8225aa0cf..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_convert_8s_aligned16.h> -#include <volk/volk_16s_convert_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h deleted file mode 100644 index 2e409d0cc..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc deleted file mode 100644 index e6ce5030e..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_convert_8s_unaligned16.h> -#include <volk/volk_16s_convert_8s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h deleted file mode 100644 index 4b2fe9e42..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc deleted file mode 100644 index c6f828ba6..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_max_star_aligned16.h> -#include <volk/volk_16s_max_star_aligned16.h> -#include <cstdlib> -#include <ctime> -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - - -void qa_16s_max_star_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 6400; - const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[1] __attribute__ ((aligned (16))); - - short output1[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h deleted file mode 100644 index 119f87c4d..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_max_star_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc deleted file mode 100644 index 0a58570e2..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_max_star_horizontal_aligned16.h> -#include <volk/volk_16s_max_star_horizontal_aligned16.h> -#include <cstdlib> -#include <ctime> -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_horizontal_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - -void qa_16s_max_star_horizontal_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 32; - const int ITERS = 1; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[vlen>>1] __attribute__ ((aligned (16))); - - short output1[vlen>>1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))); - - short minus0 = ((short) (rand() - (RAND_MAX/2))); - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_horizontal_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - /* volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/ - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < (vlen >> 1); ++i) { - // printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - - } - for(int i = 0; i < (vlen >> 1); ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } - } - - -#endif - diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h deleted file mode 100644 index 9f9757253..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index c775e8596..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_16s_aligned16.h> -#include <volk/volk_16sc_deinterleave_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_sse21[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 995ab5b34..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index b25094e89..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_32f_aligned16.h> -#include <volk/volk_16sc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16sc_deinterleave_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - float output_sse21[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h deleted file mode 100644 index fea3b6c2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index c67064ea6..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index ebb70b97a..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index f86f03b88..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,124 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE */ - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE4_1 */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index e83426473..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index dd446567e..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_8s_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 04e5511e5..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc deleted file mode 100644 index 9799ef43b..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_magnitude_16s_aligned16.h> -#include <volk/volk_16sc_magnitude_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h deleted file mode 100644 index 4664b70f4..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc deleted file mode 100644 index 1ebe644c5..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,111 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_magnitude_32f_aligned16.h> -#include <volk/volk_16sc_magnitude_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_32f_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2)); - } - printf("16sc_magnitude_32f_aligned\n"); - - float scale = 32768.0; - for(int i = 0; i < vlen; ++i) { - float re = (float)(input0[i].real())/scale; - float im = (float)(input0[i].imag())/scale; - output_known[i] = sqrt(re*re + im*im); - } - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 100; ++i) { - printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag()); - printf("generic... %f == %f\n", output_generic[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); - } -} - -#else - -void qa_16sc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("16sc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h deleted file mode 100644 index 0c25673ea..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc deleted file mode 100644 index ea117a820..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16u_byteswap_aligned16.h> -#include <volk/volk_16u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint16_t output0[vlen] __attribute__ ((aligned (16))); - uint16_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint16_t)); - - printf("16u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h deleted file mode 100644 index e11b23e3f..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc deleted file mode 100644 index 0defef283..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_accumulator_aligned16.h> -#include <volk/volk_32f_accumulator_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_accumulator_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_accumulator_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float accumulator_generic; - float accumulator_sse; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_accumulator_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4); -} - -#endif diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h deleted file mode 100644 index 0004d3ff0..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_accumulator_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc deleted file mode 100644 index f80d562d4..000000000 --- a/volk/lib/qa_32f_add_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_add_aligned16.h> -#include <volk/volk_32f_add_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_add_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] + input1[i]; - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_add_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h deleted file mode 100644 index 58e2a151c..000000000 --- a/volk/lib/qa_32f_add_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H -#define INCLUDED_QA_32F_ADD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_add_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_add_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc deleted file mode 100644 index 5d6987333..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_calc_spectral_noise_floor_aligned16.h> -#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h> -#include <cstdlib> -#include <math.h> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[1] __attribute__ ((aligned (16))); - float output01[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_calc_spectral_noise_floor_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h deleted file mode 100644 index c5dce2c4b..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc deleted file mode 100644 index 3e2452e68..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_16s_aligned16.h> -#include <volk/volk_32f_convert_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h deleted file mode 100644 index fce1eb417..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc deleted file mode 100644 index e016b7ff7..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_16s_unaligned16.h> -#include <volk/volk_32f_convert_16s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h deleted file mode 100644 index 492bc80e6..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc deleted file mode 100644 index abceb52fb..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_32s_aligned16.h> -#include <volk/volk_32f_convert_32s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_32s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h deleted file mode 100644 index 97d854463..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc deleted file mode 100644 index 90f84b56f..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_32s_unaligned16.h> -#include <volk/volk_32f_convert_32s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_32s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h deleted file mode 100644 index 5d662d86d..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc deleted file mode 100644 index 1d0754ac9..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_64f_aligned16.h> -#include <volk/volk_32f_convert_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h deleted file mode 100644 index 41eb3e094..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc deleted file mode 100644 index 6f7d5066d..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_64f_unaligned16.h> -#include <volk/volk_32f_convert_64f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_64f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h deleted file mode 100644 index 4b144f033..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc deleted file mode 100644 index 6a53629b5..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_8s_aligned16.h> -#include <volk/volk_32f_convert_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h deleted file mode 100644 index 68a523f34..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc deleted file mode 100644 index fbc5c20e6..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_8s_unaligned16.h> -#include <volk/volk_32f_convert_8s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h deleted file mode 100644 index 88d4ff42a..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc deleted file mode 100644 index 3257a3751..000000000 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_divide_aligned16.h> -#include <volk/volk_32f_divide_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_divide_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] / input1[i]; - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_divide_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h deleted file mode 100644 index 79d5ae4b8..000000000 --- a/volk/lib/qa_32f_divide_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H -#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_divide_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc deleted file mode 100644 index 98c1f2d99..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_aligned16::t1() { - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h deleted file mode 100644 index 6931a9e98..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc deleted file mode 100644 index 8e97d4249..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.cc +++ /dev/null @@ -1,190 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_dot_prod_unaligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_unaligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - input = &input[1]; // Make sure the buffer is unaligned - taps = &taps[1]; // Make sure the buffer is unaligned - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(&input[-1]); - free(&taps[-1]); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h deleted file mode 100644 index e8bad07fe..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc deleted file mode 100644 index a7ae60780..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_interleave_16sc_aligned16.h> -#include <volk/volk_32f_interleave_16sc_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32f_interleave_16sc_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_16sc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16))); - std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16))); - std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_interleave_16sc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h deleted file mode 100644 index 8d2914817..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc deleted file mode 100644 index 333b6fce8..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_interleave_32fc_aligned16.h> -#include <volk/volk_32f_interleave_32fc_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_interleave_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_32fc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex<float> output_generic[vlen] __attribute__ ((aligned (16))); - std::complex<float> output_sse[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_interleave_32fc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h deleted file mode 100644 index cba518d37..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc deleted file mode 100644 index ceb913cb4..000000000 --- a/volk/lib/qa_32f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_max_aligned16.h> -#include <volk/volk_32f_max_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_max_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h deleted file mode 100644 index d535479f4..000000000 --- a/volk/lib/qa_32f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H -#define INCLUDED_QA_32F_MAX_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc deleted file mode 100644 index 580a60e7d..000000000 --- a/volk/lib/qa_32f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_min_aligned16.h> -#include <volk/volk_32f_min_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_min_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h deleted file mode 100644 index 90961ac92..000000000 --- a/volk/lib/qa_32f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H -#define INCLUDED_QA_32F_MIN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc deleted file mode 100644 index 0c242b649..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_multiply_aligned16.h> -#include <volk/volk_32f_multiply_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_multiply_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] * input1[i]; - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_multiply_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h deleted file mode 100644 index 7032a2ad4..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc deleted file mode 100644 index 1c7b485a6..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ /dev/null @@ -1,66 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_normalize_aligned16.h> -#include <volk/volk_32f_normalize_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_normalize_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_normalize_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 320001; - const int ITERS = 100; - - float* output0; - float* output01; - ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); - - for(int i = 0; i < vlen; ++i) { - output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(float)); - printf("32f_normalize_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - // printf("%e...%e\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } - - free(output0); - free(output01); -} - -#endif diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h deleted file mode 100644 index 7c421eb82..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H -#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_normalize_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc deleted file mode 100644 index 1b331daeb..000000000 --- a/volk/lib/qa_32f_power_aligned16.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_power_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32f_power_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float* input; - int i; - - float* result_generic; - float* result_sse; - float* result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float)); - - random_floats((float*)input, vlen); - - const float power = 3; - - printf("32f_power_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - - for(i = 0; i < vlen; i++){ - //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32f_power_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h deleted file mode 100644 index d45df4e56..000000000 --- a/volk/lib/qa_32f_power_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H -#define INCLUDED_QA_32F_POWER_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_power_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_power_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc deleted file mode 100644 index 62d55767a..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_sqrt_aligned16.h> -#include <volk/volk_32f_sqrt_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_sqrt_aligned16::t1() { - printf("sse not available... no test performed\n"); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX)); - output_known[i] = sqrt(input0[i]); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f\n", input0[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4); - } -} - -#else - -void qa_32f_sqrt_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX)); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h deleted file mode 100644 index e4b99d981..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H -#define INCLUDED_QA_32F_SQRT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_sqrt_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc deleted file mode 100644 index 5934d70df..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_stddev_aligned16.h> -#include <volk/volk_32f_stddev_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean = 0; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - mean += input0[i]; - } - mean /= static_cast<float>(vlen); - - printf("32f_stddev_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h deleted file mode 100644 index 7f8d7a5fc..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_stddev_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc deleted file mode 100644 index 78c701d78..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_stddev_and_mean_aligned16.h> -#include <volk/volk_32f_stddev_and_mean_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_and_mean_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_and_mean_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean_generic; - float mean_sse; - float mean_sse4_1; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_stddev_and_mean_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h deleted file mode 100644 index e08bd249a..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc deleted file mode 100644 index ffe4b504c..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_subtract_aligned16.h> -#include <volk/volk_32f_subtract_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_subtract_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_subtract_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_subtract_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h deleted file mode 100644 index 97c14f129..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H -#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_subtract_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc deleted file mode 100644 index 494776357..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc +++ /dev/null @@ -1,142 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_sum_of_poly_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <math.h> - -#define SNR 30.0 -#define CENTER -4.0 -#define CUTOFF -5.595 -#define ERR_DELTA (1e-4) -#define NUM_ITERS 100000 -#define VEC_LEN 64 -static float uniform() { - return ((float) rand() / RAND_MAX); // uniformly (0, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * -SNR/2.0; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32f_sum_of_poly_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32f_sum_of_poly_aligned16::t1(){ - int i = 0; - - volk_environment_init(); - int ret; - - const int vlen = VEC_LEN; - float cutoff = CUTOFF; - - float* center_point_array; - float* target; - float* target_generic; - float* src0 ; - - - ret = posix_memalign((void**)¢er_point_array, 16, 24); - ret = posix_memalign((void**)&target, 16, 4); - ret = posix_memalign((void**)&target_generic, 16, 4); - ret = posix_memalign((void**)&src0, 16, (vlen << 2)); - - - random_floats((float*)src0, vlen); - - float a = (float)CENTER; - float etoa = expf(a); - center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 + - (-4.0 * a * a * a)/24.0 + - (3.0 * a * a)/6.0 + - (-2.0 * a)/2.0 + - (1.0)) * etoa; - center_point_array[1] = (//(-10.0 * a * a * a)/120.0 + - (6.0 * a * a)/24.0 + - (-3.0 * a)/6.0 + - (1.0/2.0)) * etoa; - center_point_array[2] = (//(10.0 * a * a)/120.0 + - (-4.0 * a)/24.0 + - (1.0/6.0)) * etoa; - center_point_array[3] = (//(-5.0 * a)/120.0 + - (1.0/24.0)) * etoa; - //center_point_array[4] = ((1.0)/120.0) * etoa; - center_point_array[4] = (//(a * a * a * a * a)/120.0 + - (a * a * a * a)/24.0 + - (a * a * a)/-6.0 + - (a * a)/2.0 + - -a + 1.0) * etoa; - - printf("32f_sum_of_poly_aligned16\n"); - - clock_t start, end; - double total; - - float my_sum = 0.0; - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - float sum = 0.0; - for(int l = 0; l < vlen; ++l) { - - sum += expf(src0[l]); - - } - my_sum = sum; - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("exp time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - - volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic"); - - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 approx time: %f\n", total); - - - - printf("exp: %f, sse3: %f\n", my_sum, target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA); - - - free(center_point_array); - free(target); - free(target_generic); - free(src0); - - -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h deleted file mode 100644 index 67a347f9a..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc deleted file mode 100644 index 4eba0a3cd..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ /dev/null @@ -1,85 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_32f_multiply_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_32f_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - float * taps; - int i; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats(taps, vlen); - - printf("32fc_32f_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_32f_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h deleted file mode 100644 index fc3b3eeb2..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc deleted file mode 100644 index 64ea65da9..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc +++ /dev/null @@ -1,83 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_32f_power_32fc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1.5e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32fc_32f_power_32fc_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - int i; - - std::complex<float>* result_generic; - std::complex<float>* result_sse; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - - const float power = 3.2; - - printf("32fc_32f_power_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32fc_32f_power_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h deleted file mode 100644 index 464b7b7cc..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc deleted file mode 100644 index c55ab5aa0..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_atan2_32f_aligned16.h> -#include <volk/volk_32fc_atan2_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_atan2_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_atan2_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_atan2_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h deleted file mode 100644 index 9c4dc209a..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc deleted file mode 100644 index 2f9a30395..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_conjugate_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h deleted file mode 100644 index 507b1769b..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 72e084c05..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_32f_aligned16.h> -#include <volk/volk_32fc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h deleted file mode 100644 index 78660e6ad..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc deleted file mode 100644 index 89770c236..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_64f_aligned16.h> -#include <volk/volk_32fc_deinterleave_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_generic1[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - double output_sse21[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h deleted file mode 100644 index f924b9752..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 7472476f7..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 68b80f27d..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 5cbdc49b3..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 765450bb6..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc deleted file mode 100644 index 4147e30ae..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_64f_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h deleted file mode 100644 index 3e55fb812..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc deleted file mode 100644 index bcf9ea954..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> -#include <stdio.h> - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - - - -#if LV_HAVE_SSE3 -void qa_32fc_dot_prod_aligned16::t1() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_32 -void qa_32fc_dot_prod_aligned16::t2() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_32_time: %f\n", total); - - printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t2() { - printf("sse_32 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_64 - -void qa_32fc_dot_prod_aligned16::t3() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_64_time: %f\n", total); - - printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t3() { - printf("sse_64 not available... no test performed\n"); -} - - - -#endif diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h deleted file mode 100644 index 4b360db27..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); - void t2 (); - void t3 (); -}; - - -#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc deleted file mode 100644 index 16984e30d..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_magnitude_16s_aligned16.h> -#include <volk/volk_32fc_magnitude_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h deleted file mode 100644 index ffdf1dd9e..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc deleted file mode 100644 index b99f1ddcf..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_magnitude_32f_aligned16.h> -#include <volk/volk_32fc_magnitude_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h deleted file mode 100644 index a2881308c..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc deleted file mode 100644 index e1f7eab3d..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ /dev/null @@ -1,86 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_multiply_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - int i; - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h deleted file mode 100644 index c8abaa8fe..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc deleted file mode 100644 index 1444c78a9..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_power_spectrum_32f_aligned16.h> -#include <volk/volk_32fc_power_spectrum_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse3 - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - const float scalar = vlen; - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - - printf("32fc_power_spectrum_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); - } -} - -#endif diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h deleted file mode 100644 index d991223f3..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc deleted file mode 100644 index d9ead8495..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_square_dist_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> - -#define ERR_DELTA (1e-4) -#define NUM_ITERS 10000000 -#define VEC_LEN 64 -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex<float>* src0 ; - std::complex<float>* points; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - - printf("32fc_square_dist_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(; i < vlen; ++i) { - //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h deleted file mode 100644 index 9d365d8b0..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc deleted file mode 100644 index f923d1d5c..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_square_dist_scalar_mult_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA .0001 -#define NUM_ITERS 10000000 -#define VEC_LEN 64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex<float>* src0 ; - std::complex<float>* points; - float scalar; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - random_floats(&scalar, 1); - - printf("32fc_square_dist_scalar_mult_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(i = 0; i < vlen; ++i) { - printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h deleted file mode 100644 index ac4e3c45b..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc deleted file mode 100644 index 661801709..000000000 --- a/volk/lib/qa_32s_and_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_and_aligned16.h> -#include <volk/volk_32s_and_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_and_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_and_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_and_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h deleted file mode 100644 index dfcb47c63..000000000 --- a/volk/lib/qa_32s_and_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H -#define INCLUDED_QA_32S_AND_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_and_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_and_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc deleted file mode 100644 index 07d799809..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_convert_32f_aligned16.h> -#include <volk/volk_32s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h deleted file mode 100644 index efd2a2eea..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc deleted file mode 100644 index 2ec610ffb..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_convert_32f_unaligned16.h> -#include <volk/volk_32s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h deleted file mode 100644 index 5006f5fd8..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc deleted file mode 100644 index 9da2ae344..000000000 --- a/volk/lib/qa_32s_or_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_or_aligned16.h> -#include <volk/volk_32s_or_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_or_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_or_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_or_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h deleted file mode 100644 index 9e949eb52..000000000 --- a/volk/lib/qa_32s_or_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H -#define INCLUDED_QA_32S_OR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_or_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_or_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc deleted file mode 100644 index 313c786b6..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_32u_byteswap_aligned16.h> -#include <volk/volk_32u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint32_t output0[vlen] __attribute__ ((aligned (16))); - uint32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint32_t)); - printf("32u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h deleted file mode 100644 index 47bad4c3d..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc deleted file mode 100644 index 7f9c4584a..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_convert_32f_aligned16.h> -#include <volk/volk_64f_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h deleted file mode 100644 index 95d79f73d..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc deleted file mode 100644 index 98aadbf4d..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_convert_32f_unaligned16.h> -#include <volk/volk_64f_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h deleted file mode 100644 index 430327e81..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc deleted file mode 100644 index 76e755514..000000000 --- a/volk/lib/qa_64f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_max_aligned16.h> -#include <volk/volk_64f_max_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_max_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h deleted file mode 100644 index 7cbd4d4c1..000000000 --- a/volk/lib/qa_64f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H -#define INCLUDED_QA_64F_MAX_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc deleted file mode 100644 index 4b70d2881..000000000 --- a/volk/lib/qa_64f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_min_aligned16.h> -#include <volk/volk_64f_min_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_min_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h deleted file mode 100644 index a0e95395f..000000000 --- a/volk/lib/qa_64f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H -#define INCLUDED_QA_64F_MIN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc deleted file mode 100644 index 20d012c9e..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_64u_byteswap_aligned16.h> -#include <volk/volk_64u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint64_t output0[vlen] __attribute__ ((aligned (16))); - uint64_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint64_t)); - printf("64u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h deleted file mode 100644 index a4fa0c983..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc deleted file mode 100644 index 8dd5f76ca..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_16s_aligned16.h> -#include <volk/volk_8s_convert_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h deleted file mode 100644 index 38739fc96..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc deleted file mode 100644 index 12c502d4b..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_16s_unaligned16.h> -#include <volk/volk_8s_convert_16s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_unaligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h deleted file mode 100644 index d39fffc35..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc deleted file mode 100644 index 672f5662f..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_32f_aligned16.h> -#include <volk/volk_8s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h deleted file mode 100644 index 7f8401d42..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc deleted file mode 100644 index 43468b1b1..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_32f_unaligned16.h> -#include <volk/volk_8s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_unaligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen+1] __attribute__ ((aligned (16))); - - float output_generic[vlen+1] __attribute__ ((aligned (16))); - float output_sse4_1[vlen+1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%e...%e\n", output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h deleted file mode 100644 index aad2f8c22..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index 94e63e37d..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_16s_aligned16.h> -#include <volk/volk_8sc_deinterleave_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_11[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse4_11[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 9c99fed70..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 29073eed7..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_32f_aligned16.h> -#include <volk/volk_8sc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - float output_sse14_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - } -} - - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h deleted file mode 100644 index 63b5fdadb..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 4980c982a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 02050926f..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 3c3f737a1..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> *input0; - - float* output_generic; - float* output_sse; - float* output_sse4_1; - - ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t)); - ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float)); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0); - } - - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } - - free(input0); - free(output_generic); - free(output_sse); - free(output_sse4_1); -} - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 93338e488..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index a33d1bf30..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_8s_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 92fc0dd4a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc deleted file mode 100644 index 216bf1cef..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_multiply_conjugate_16sc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <ctime> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<int8_t>* input; - std::complex<int8_t>* taps; - - std::complex<int16_t>* result_generic; - std::complex<int16_t>* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t)); - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_16sc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h deleted file mode 100644 index 0e78a5eca..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc deleted file mode 100644 index 4c707446e..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_multiply_conjugate_32fc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <ctime> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<int8_t>* input; - std::complex<int8_t>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float)); - - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h deleted file mode 100644 index eb9ae309c..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc new file mode 100644 index 000000000..e85e2c1bc --- /dev/null +++ b/volk/lib/qa_utils.cc @@ -0,0 +1,447 @@ +#include "qa_utils.h" +#include <stdlib.h> +#include <boost/foreach.hpp> +#include <boost/assign/list_of.hpp> +#include <boost/tokenizer.hpp> +//#include <boost/test/unit_test.hpp> +#include <iostream> +#include <vector> +#include <time.h> +#include <math.h> +#include <boost/lexical_cast.hpp> +//#include <volk/volk_runtime.h> +#include <volk/volk_registry.h> +#include <volk/volk.h> +#include <boost/typeof/typeof.hpp> +#include <boost/type_traits.hpp> + +float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +template <class t> +void random_floats (t *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +void load_random_data(void *data, volk_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + if(type.size == 8) random_floats<double>((double *)data, n); + else random_floats<float>((float *)data, n); + } else { + float int_max = float(uint64_t(2) << (type.size*8)); + if(type.is_signed) int_max /= 2.0; + for(int i=0; i<n; i++) { + float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here + } + } + } +} + +void *make_aligned_buffer(unsigned int len, unsigned int size) { + void *buf; + int ret; + ret = posix_memalign((void**)&buf, 16, len * size); + assert(ret == 0); + memset(buf, 0x00, len*size); + return buf; +} + +void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) { + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); + } +} + +static std::vector<std::string> get_arch_list(const int archs[]) { + std::vector<std::string> archlist; + int num_archs = archs[0]; + + //there has got to be a way to query these arches + for(int i = 0; i < num_archs; i++) { + switch(archs[i+1]) { + case (1<<LV_GENERIC): + archlist.push_back("generic"); + break; + case (1<<LV_ORC): + archlist.push_back("orc"); + break; + case (1<<LV_SSE): + archlist.push_back("sse"); + break; + case (1<<LV_SSE2): + archlist.push_back("sse2"); + break; + case (1<<LV_SSE3): + archlist.push_back("sse3"); + break; + case (1<<LV_SSSE3): + archlist.push_back("ssse3"); + break; + case (1<<LV_SSE4_1): + archlist.push_back("sse4_1"); + break; + case (1<<LV_SSE4_2): + archlist.push_back("sse4_2"); + break; + case (1<<LV_SSE4_A): + archlist.push_back("sse4_a"); + break; + case (1<<LV_MMX): + archlist.push_back("mmx"); + break; + case (1<<LV_AVX): + archlist.push_back("avx"); + break; + default: + break; + } + } + return archlist; +} + +volk_type_t volk_type_from_string(std::string name) { + volk_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + if(name.size() < 2) throw std::string("name too short to be a datatype"); + + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + int last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); + //will throw if malformed + int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(int i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } + } + + return type; +} + +static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, + std::vector<volk_type_t> &outputsig, + std::string name) { + boost::char_separator<char> sep("_"); + boost::tokenizer<boost::char_separator<char> > tok(name, sep); + std::vector<std::string> toked; + tok.assign(name); + toked.assign(tok.begin(), tok.end()); + + assert(toked[0] == "volk"); + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_type_from_string(token); + if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... + + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i<multiplier; i++) { + if(side == SIDE_INPUT) inputsig.push_back(inputsig.back()); + else outputsig.push_back(outputsig.back()); + } + } + else if(side == SIDE_INPUT) { //it's the function name, at least it better be + side = SIDE_NAME; + fn_name.append("_"); + fn_name.append(token); + } + else if(side == SIDE_OUTPUT) { + if(token != toked.back()) throw; //the last token in the name is the alignment + } + } + } + //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input! + assert(inputsig.size() != 0); +} + +inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], vlen, arch.c_str()); +} + +inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); +} + +inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); +} + +inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); +} + +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +template <class t> +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; + for(int i=0; i<vlen; i++) { + if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision + if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } + } + + return fail; +} + +template <class t> +bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { + bool fail = false; + int print_max_errs = 10; + for(int i=0; i<vlen; i++) { + if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; + } + } + } + + return fail; +} + +bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { + std::cout << "RUN_VOLK_TESTS: " << name << std::endl; + + //first let's get a list of available architectures for the test + std::vector<std::string> arch_list = get_arch_list(archs); + + if(arch_list.size() < 2) { + std::cout << "no architectures to test" << std::endl; + return false; + } + + //now we have to get a function signature by parsing the name + std::vector<volk_type_t> inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); + + //pull the input scalars into their own vector + std::vector<volk_type_t> inputsc; + for(int i=0; i<inputsig.size(); i++) { + if(inputsig[i].is_scalar) { + inputsc.push_back(inputsig[i]); + inputsig.erase(inputsig.begin() + i); + } + } + + //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl; + //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl; + std::vector<void *> inbuffs; + std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later. + //we need it because we dupe void*s in test_data below. + make_buffer_for_signature(inbuffs, inputsig, vlen); + for(int i=0; i<inbuffs.size(); i++) { + load_random_data(inbuffs[i], inputsig[i], vlen); + free_buffs.push_back(inbuffs[i]); + } + + //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch + std::vector<std::vector<void *> > test_data; + for(int i=0; i<arch_list.size(); i++) { + std::vector<void *> arch_buffs; + for(int j=0; j<outputsig.size(); j++) { + arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1))); + free_buffs.push_back(arch_buffs.back()); + } + for(int j=0; j<inputsig.size(); j++) { + arch_buffs.push_back(inbuffs[j]); + } + test_data.push_back(arch_buffs); + } + + std::vector<volk_type_t> both_sigs; + both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); + both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); + + //now run the test + clock_t start, end; + for(int i = 0; i < arch_list.size(); i++) { + start = clock(); + + switch(both_sigs.size()) { + case 1: + if(inputsc.size() == 0) { + run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 1 arg function >1 scalars"; + break; + case 2: + if(inputsc.size() == 0) { + run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 2 arg function >1 scalars"; + break; + case 3: + if(inputsc.size() == 0) { + run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 3 arg function >1 scalars"; + break; + case 4: + run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + break; + default: + throw "no function handler for this signature"; + break; + } + + end = clock(); + std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + } + //and now compare each output to the generic output + //first we have to know which output is the generic one, they aren't in order... + int generic_offset=0; + for(int i=0; i<arch_list.size(); i++) + if(arch_list[i] == "generic") generic_offset=i; + + //now compare + //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know + + bool fail = false; + bool fail_global = false; + for(int i=0; i<arch_list.size(); i++) { + if(i != generic_offset) { + for(int j=0; j<both_sigs.size(); j++) { + if(both_sigs[j].is_float) { + if(both_sigs[j].size == 8) { + fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + } else { + //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ + switch(both_sigs[j].size) { + case 8: + if(both_sigs[j].is_signed) { + fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 4: + if(both_sigs[j].is_signed) { + fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 2: + if(both_sigs[j].is_signed) { + fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 1: + if(both_sigs[j].is_signed) { + fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + default: + fail=1; + } + } + if(fail) { + fail_global = true; + std::cout << name << ": fail on arch " << arch_list[i] << std::endl; + } + //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); + } + } + } + + BOOST_FOREACH(void *buf, free_buffs) { + free(buf); + } + + return fail_global; +} + + diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h new file mode 100644 index 000000000..e2539060a --- /dev/null +++ b/volk/lib/qa_utils.h @@ -0,0 +1,33 @@ +#ifndef VOLK_QA_UTILS_H +#define VOLK_QA_UTILS_H + +#include <stdlib.h> +#include <string> + +struct volk_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_type_t volk_type_from_string(std::string); + +float uniform(void); +void random_floats(float *buf, unsigned n); + +bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); + +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) + +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place +typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); +typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); +typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*); + +#endif //VOLK_QA_UTILS_H diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h deleted file mode 100644 index 43fa7faba..000000000 --- a/volk/lib/qa_volk.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU Example Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Example Public License for more details. - * - * You should have received a copy of the GNU Example Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#ifndef INCLUDED_QA_VOLK_H -#define INCLUDED_QA_VOLK_H - -#include <cppunit/TestSuite.h> - -//! collect all the tests for the example directory - -class qa_volk { - public: - //! return suite of tests for all of example directory - static CppUnit::TestSuite *suite (); -}; - -#endif /* INCLUDED_QA_VOLK_H */ diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc deleted file mode 100644 index 50ac08eab..000000000 --- a/volk/lib/test_all.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2002,2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#include <cppunit/ui/text/TestRunner.h> -#include <cppunit/TextTestRunner.h> - -#include <qa_volk.h> - -#include <cppunit/XmlOutputter.h> -#include <iostream> -#include <getopt.h> -#include <stdlib.h> -#include <stdio.h> -#include <string> -#include <fstream> - -int -main (int argc, char **argv) -{ - - int opt = 0; - std::string xmlOutputFile(""); - - while( (opt = getopt(argc, argv, "o:")) != -1){ - switch(opt){ - case 'o': - if(optarg){ - xmlOutputFile.assign(optarg); - } - else{ - std::cerr << "No xml file output specified for -o" << std::endl; - exit(EXIT_FAILURE); - } - break; - - default: /* '?' */ - fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n", - argv[0]); - exit(EXIT_FAILURE); - } - - } - - CppUnit::TextUi::TestRunner runner; - - runner.addTest (qa_volk::suite ()); - - bool was_successful = false; - if(!xmlOutputFile.empty()){ - std::ofstream xmlOutput(xmlOutputFile.c_str()); - if(xmlOutput.is_open()){ - runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput)); - - was_successful = runner.run("", false, true, false); - } - xmlOutput.close(); - } - else{ - was_successful = runner.run ("", false); - } - - return was_successful ? 0 : 1; -} diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc new file mode 100644 index 000000000..f33670856 --- /dev/null +++ b/volk/lib/testqa.cc @@ -0,0 +1,99 @@ +#include "qa_utils.h" +#include <volk/volk.h> +#include <volk/volk_registry.h> +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_CASE(volk_test_all) { + //in order... +// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); +// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + +} |