From bef3db60e73953f2d2ecdc6a86a81e11df3b103d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Mon, 13 Dec 2010 19:18:45 -0800 Subject: volk: committed some stuff i neglected --- volk/lib/Makefile.am | 17 +++++++++++------ volk/lib/qa_32f_sqrt_aligned16.cc | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 814d438fd..1291b01cd 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" #TESTS = test_all - +#orc stuff gets built in the ORC directory conditional to ORC being enabled. +#it gets linked in during the build of libvolk as an added library. +#there might be a better way to do this. lib_LTLIBRARIES = \ libvolk.la \ @@ -72,6 +74,9 @@ universal_CODE = \ generic_CODE = \ volk_cpu_generic.c + +orc_CODE = \ + volk_cpu_orc.c x86_CODE = \ volk_cpu_x86.c @@ -133,10 +138,9 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 - -libvolk_la_LIBADD = +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_la_LIBADD = ../orc/libvolk_orc.a @@ -233,11 +237,12 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ + ../orc/libvolk_orc.a \ $(CPPUNIT_LIBS) # ---------------------------------------------------------------- diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc index 9a5f71de0..81d66dad7 100644 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -52,6 +52,14 @@ void qa_32f_sqrt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + /* for(int i = 0; i < 10; ++i) { printf("inputs: %f\n", input0[i]); @@ -92,6 +100,13 @@ void qa_32f_sqrt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); } -- cgit From 611526f9dfba0df4a1a49d47916706438ac194b3 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 01:00:29 -0800 Subject: Volk: Automated more automake for orc. Brought orcc generation in. Shared library libvolk_orc.la. Linking is hackery right now with specified -lorc-0.4 flags; this should change. Otherwise pretty much OK. --- volk/lib/Makefile.am | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 1291b01cd..649d461e0 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -138,9 +138,9 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 -libvolk_la_LIBADD = ../orc/libvolk_orc.a +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_la_LIBADD = ../orc/libvolk_orc.la @@ -237,12 +237,12 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ - ../orc/libvolk_orc.a \ + ../orc/libvolk_orc.la \ $(CPPUNIT_LIBS) # ---------------------------------------------------------------- -- cgit From 05f4bced29987a0a573d1fc5b214f3fa01dc84bd Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 13:36:55 -0800 Subject: Volk: More autotools stuff for Orc. Should build OK with or without Orc now. --- volk/lib/Makefile.am | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 649d461e0..385401ae1 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -138,10 +138,13 @@ endif -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +if HAVE_ORC libvolk_la_LIBADD = ../orc/libvolk_orc.la - +libvolk_la_LDFLAGS += -lorc-0.4 +libvolk_runtime_la_LDFLAGS += -lorc-0.4 +endif # ---------------------------------------------------------------- @@ -237,13 +240,18 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ - ../orc/libvolk_orc.la \ $(CPPUNIT_LIBS) + +if HAVE_ORC +libvolk_qa_la_LIBADD += \ + ../orc/libvolk_orc.la + libvolk_qa_la_LDFLAGS += -lorc-0.4 +endif # ---------------------------------------------------------------- # headers that don't get installed -- cgit From d8031649fa3186d7e6b000dcfaa349deacf51262 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 16:41:14 -0800 Subject: Volk: patch via Nick M. --- volk/lib/Makefile.am | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 385401ae1..d38004f2a 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -74,9 +74,6 @@ universal_CODE = \ generic_CODE = \ volk_cpu_generic.c - -orc_CODE = \ - volk_cpu_orc.c x86_CODE = \ volk_cpu_x86.c @@ -356,7 +353,7 @@ noinst_PROGRAMS = \ test_all test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk_qa.la +test_all_LDADD = libvolk_qa.la ../orc/libvolk_orc.la distclean-local: -- cgit From 2e9a7d350713b4e1b21458db8f3fce8a557858ae Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 17:13:40 -0800 Subject: Volk: Added QA tests for all the Orc stuff. Added a 16u_byteswap but it's broken right now. --- volk/lib/qa_16u_byteswap_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_add_aligned16.cc | 9 +++++++++ volk/lib/qa_32s_and_aligned16.cc | 9 +++++++++ volk/lib/qa_8s_convert_32f_aligned16.cc | 8 ++++++++ 4 files changed, 35 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index 6b19828a4..c30b6ba41 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -24,6 +24,7 @@ void qa_16u_byteswap_aligned16::t1() { uint16_t output0[vlen] __attribute__ ((aligned (16))); uint16_t output01[vlen] __attribute__ ((aligned (16))); + uint16_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_16u_byteswap_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16u_byteswap_aligned16_manual(output02, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); } @@ -54,6 +62,7 @@ void qa_16u_byteswap_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc index 002aebfc9..d9214e8a2 100644 --- a/volk/lib/qa_32f_add_aligned16.cc +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -78,6 +78,7 @@ void qa_32f_add_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -93,6 +94,13 @@ void qa_32f_add_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -107,6 +115,7 @@ void qa_32f_add_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc index 72d05cf6f..5720ee869 100644 --- a/volk/lib/qa_32s_and_aligned16.cc +++ b/volk/lib/qa_32s_and_aligned16.cc @@ -25,6 +25,7 @@ void qa_32s_and_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -40,6 +41,13 @@ void qa_32s_and_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32s_and_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc index 522da0b9d..3b3aa6919 100644 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ b/volk/lib/qa_8s_convert_32f_aligned16.cc @@ -40,6 +40,14 @@ void qa_8s_convert_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { -- cgit From 87a9b14e0b0e2c2d0dcd75d42f2a15211265f102 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 17:44:34 -0800 Subject: Volk: added references to libs instead of specifying them directly --- volk/lib/Makefile.am | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index d38004f2a..faab4a010 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -133,14 +133,21 @@ libvolk_runtime_la_SOURCES = \ $(universal_runtime_CODE) endif +volk_orc_LDFLAGS = \ + $(ORC_LDFLAGS) \ + -lorc-0.4 + +volk_orc_LIBADD = \ + ../orc/libvolk_orc.la - +if HAVE_ORC +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) +else libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -if HAVE_ORC -libvolk_la_LIBADD = ../orc/libvolk_orc.la -libvolk_la_LDFLAGS += -lorc-0.4 -libvolk_runtime_la_LDFLAGS += -lorc-0.4 +libvolk_la_LIBADD = endif @@ -243,12 +250,6 @@ libvolk_qa_la_LIBADD = \ libvolk.la \ libvolk_runtime.la \ $(CPPUNIT_LIBS) - -if HAVE_ORC -libvolk_qa_la_LIBADD += \ - ../orc/libvolk_orc.la - libvolk_qa_la_LDFLAGS += -lorc-0.4 -endif # ---------------------------------------------------------------- # headers that don't get installed @@ -353,7 +354,7 @@ noinst_PROGRAMS = \ test_all test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk_qa.la ../orc/libvolk_orc.la +test_all_LDADD = libvolk_qa.la distclean-local: -- cgit From 21426265324c883c91eeaaf75a81f2ccdc6e249d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 21:12:49 -0800 Subject: Volk: Build fixes to work with/without Orc. --- volk/lib/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index faab4a010..253033461 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -140,7 +140,7 @@ volk_orc_LDFLAGS = \ volk_orc_LIBADD = \ ../orc/libvolk_orc.la -if HAVE_ORC +if LV_HAVE_ORC libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) libvolk_la_LIBADD = $(volk_orc_LIBADD) -- cgit From f9ee6a55cb397f9302769a25a8c959fa162354f0 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 14 Dec 2010 22:58:33 -0800 Subject: Volk: Some new basic Orc implementations with QA code --- volk/lib/qa_16u_byteswap_aligned16.cc | 1 + volk/lib/qa_32f_divide_aligned16.cc | 10 ++++++++++ volk/lib/qa_32f_multiply_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_subtract_aligned16.cc | 9 +++++++++ 4 files changed, 29 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index c30b6ba41..b740f91df 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -30,6 +30,7 @@ void qa_16u_byteswap_aligned16::t1() { output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(uint16_t)); + memcpy(output02, output0, vlen*sizeof(uint16_t)); printf("16u_byteswap_aligned\n"); diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index 8826bf94f..f104e0443 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -35,6 +35,7 @@ void qa_32f_divide_aligned16::t1() { float input1[vlen] __attribute__ ((aligned (16))); float output0[vlen] __attribute__ ((aligned (16))); + float output1[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { @@ -51,6 +52,14 @@ void qa_32f_divide_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 10; ++i) { @@ -61,6 +70,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]); } } diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc index e52748466..f9c034d70 100644 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -78,6 +78,7 @@ void qa_32f_multiply_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -93,6 +94,13 @@ void qa_32f_multiply_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -107,6 +115,7 @@ void qa_32f_multiply_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc index a7e1b5ae3..5a5a7c9b6 100644 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ b/volk/lib/qa_32f_subtract_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_subtract_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_subtract_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_subtract_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } -- cgit From 15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 15 Dec 2010 16:27:42 -0800 Subject: Volk: A bunch of new ORC routines plus tests. Also fixed a typo in the generic version of 16sc_magnitude_16s_a16. --- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 9 +++++++++ volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 20 ++++++++++++++++++++ volk/lib/qa_32f_divide_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 9 +++++++++ volk/lib/qa_32s_or_aligned16.cc | 9 +++++++++ 6 files changed, 65 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index b14610757..c8f13ff84 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 2c9e48f6e..e7178863c 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); int16_t* inputLoad = (int16_t*)input0; @@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 100; ++i) { @@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); } } @@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -79,6 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); @@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index f104e0443..b2c2ecf9a 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -103,6 +104,13 @@ void qa_32f_divide_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index a4be1616b..c3e65866b 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc index d69ada408..6a1d46c7a 100644 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { std::complex input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -40,6 +41,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); } @@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc index e09dfb91c..9ea5283a6 100644 --- a/volk/lib/qa_32s_or_aligned16.cc +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -40,6 +41,13 @@ void qa_32s_or_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } -- cgit From c6fff77de9b686761f93f0e1de237f8543f5e919 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 11:14:41 -0800 Subject: Volk: A bunch of new Orc routines plus a couple of build changes. 32fc_magnitude_16s fails test_all right now. --- volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 12 ++++++++++++ volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 11 +++++++++++ volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 9 +++++++++ volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 5 +++-- volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 6 +++--- volk/lib/qa_32f_max_aligned16.cc | 9 +++++++++ volk/lib/qa_32f_min_aligned16.cc | 9 +++++++++ volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++---- volk/lib/qa_volk.cc | 1 - 9 files changed, 60 insertions(+), 10 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc index e700ac72c..7e9e31df5 100644 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { int16_t output_generic1[vlen] __attribute__ ((aligned (16))); int16_t output_sse2[vlen] __attribute__ ((aligned (16))); int16_t output_sse21[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); + int16_t output_orc1[vlen] __attribute__ ((aligned (16))); int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); @@ -43,6 +45,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); } @@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); + + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]); } } diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc index 6ee076998..45100206d 100644 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc @@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { float output_generic1[vlen] __attribute__ ((aligned (16))); float output_sse2[vlen] __attribute__ ((aligned (16))); float output_sse21[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); + float output_orc1[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -41,6 +43,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); } @@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4); } } diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc index 5ab458bc9..d187d20c3 100644 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc @@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { int8_t output_generic[vlen] __attribute__ ((aligned (16))); int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); + int8_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -39,6 +40,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); } @@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); } } diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index c8f13ff84..dd4ae75ff 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); - start = clock(); +/* start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); +*/ start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); @@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); + //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index e7178863c..53d42e28c 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); - start = clock(); +/* start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); - +*/ start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); @@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); +// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc index 3ef375176..cb1fd3627 100644 --- a/volk/lib/qa_32f_max_aligned16.cc +++ b/volk/lib/qa_32f_max_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_max_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc index 617e18b24..bf453f360 100644 --- a/volk/lib/qa_32f_min_aligned16.cc +++ b/volk/lib/qa_32f_min_aligned16.cc @@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); @@ -40,6 +41,13 @@ void qa_32f_min_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); } @@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index c3e65866b..105d32d0c 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } + //for(int i = 0; i < 10; ++i) { + // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + //} for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index c3c27b69b..f6a334da7 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -118,7 +118,6 @@ CppUnit::TestSuite * qa_volk::suite() { CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - s->addTest(qa_16s_quad_max_star_aligned16::suite()); s->addTest(qa_32fc_dot_prod_aligned16::suite()); s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); -- cgit From 200720da362e30f74083aad4dc106e4a057638bf Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 12:20:16 -0800 Subject: Volk: Magnitude functions. 32fc_magnitude_16s currently clips to +MAX instead of -MAX. --- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 6 +++--- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index dd4ae75ff..d00315b57 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); -/* start = clock(); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); -*/ + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); @@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 105d32d0c..53b3bf790 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - //for(int i = 0; i < 10; ++i) { - // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); - //} + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + } for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); -- cgit From 0e92b93f21fc9c324c379bc318120d414e7422cc Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 17 Dec 2010 13:35:40 -0800 Subject: Volk: Orc impl for 32fc_magnitude_16s saturates at -max instead of +max. --- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 6 +++--- volk/lib/qa_volk.cc | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 53b3bf790..93d4ec150 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,9 +63,9 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + for(int i = 0; i < 1; ++i) { + // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); } for(int i = 0; i < vlen; ++i) { diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index f6a334da7..c3c27b69b 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -118,6 +118,7 @@ CppUnit::TestSuite * qa_volk::suite() { CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); + s->addTest(qa_16s_quad_max_star_aligned16::suite()); s->addTest(qa_32fc_dot_prod_aligned16::suite()); s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); -- cgit From 5b45b875ed58fd66234764a05da42c6eaff22c4d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 11 Jan 2011 15:17:55 -0800 Subject: Volk: Added more Orc routines (including complex multiply). Started redoing the testing framework so it's easier to add new archs to tests. --- volk/lib/Makefile.am | 2 + volk/lib/qa_32f_normalize_aligned16.cc | 13 +++++ volk/lib/qa_32fc_32f_multiply_aligned16.cc | 84 +++++++++++++----------------- volk/lib/qa_32fc_multiply_aligned16.cc | 12 +++++ 4 files changed, 64 insertions(+), 47 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 253033461..0aeafe4aa 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -156,6 +156,7 @@ endif # ---------------------------------------------------------------- libvolk_qa_la_SOURCES = \ qa_volk.cc \ + qa_utils.cc \ qa_16s_quad_max_star_aligned16.cc \ qa_32fc_dot_prod_aligned16.cc \ qa_32fc_square_dist_aligned16.cc \ @@ -257,6 +258,7 @@ libvolk_qa_la_LIBADD = \ noinst_HEADERS = \ volk_init.h \ qa_volk.h \ + qa_utils.h \ assembly.h \ qa_16s_quad_max_star_aligned16.h \ qa_32fc_dot_prod_aligned16.h \ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc index 1c7b485a6..0da43ecff 100644 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ b/volk/lib/qa_32f_normalize_aligned16.cc @@ -26,13 +26,16 @@ void qa_32f_normalize_aligned16::t1() { float* output0; float* output01; + float* output02; ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); + ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float)); for(int i = 0; i < vlen; ++i) { output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(float)); + memcpy(output02, output0, vlen*sizeof(float)); printf("32f_normalize_aligned\n"); start = clock(); @@ -49,6 +52,14 @@ void qa_32f_normalize_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + for(int i = 0; i < 1; ++i) { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); @@ -57,10 +68,12 @@ void qa_32f_normalize_aligned16::t1() { for(int i = 0; i < vlen; ++i) { // printf("%e...%e\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4); } free(output0); free(output01); + free(output02); } #endif diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc index 4eba0a3cd..7bb8d21c1 100644 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -2,28 +2,12 @@ #include #include #include -#include #include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); +#include +#include #define ERR_DELTA (1e-4) -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 void qa_32fc_32f_multiply_aligned16::t1() { const int vlen = 2046; @@ -36,50 +20,56 @@ void qa_32fc_32f_multiply_aligned16::t1() { std::complex* input; float * taps; int i; + std::vector archs; + archs.push_back("generic"); +#ifdef LV_HAVE_SSE3 + archs.push_back("sse3"); +#endif +#ifdef LV_HAVE_ORC + archs.push_back("orc"); +#endif - std::complex* result_generic; - std::complex* result_sse3; + std::vector* > results; ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); + + for(i=0; i < archs.size(); i++) { + std::complex *ptr; + ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float)); + if(ret) { + printf("Couldn't allocate memory\n"); + exit(1); + } + results.push_back(ptr); + } random_floats((float*)input, vlen * 2); random_floats(taps, vlen); printf("32fc_32f_multiply_aligned16\n"); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + for(i=0; i < archs.size(); i++) { + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str()); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("%s_time: %f\n", archs[i].c_str(), total); } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + for(i=0; i < vlen; i++) { + int j = 1; + for(j; j < archs.size(); j++) { + assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA); + } } free(input); free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_32f_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); + for(i=0; i < archs.size(); i++) { + free(results[i]); + } } -#endif /* LV_HAVE_SSE3 */ - diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc index e1f7eab3d..022b58ad6 100644 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_multiply_aligned16.cc @@ -41,11 +41,13 @@ void qa_32fc_multiply_aligned16::t1() { std::complex* result_generic; std::complex* result_sse3; + std::complex* result_orc; ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float)); random_floats((float*)input, vlen * 2); random_floats((float*)taps, vlen * 2); @@ -67,15 +69,25 @@ void qa_32fc_multiply_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); for(i = 0; i < vlen; i++){ assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA); } free(input); free(taps); free(result_generic); free(result_sse3); + free(result_orc); } #else -- cgit From c77bb3e71562daa68e9a195a0131b7cc04324784 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 12 Jan 2011 19:20:35 -0800 Subject: Volk: Working on a new QA architecture that doesn't require individual test programs. --- volk/lib/Makefile.am | 2 - volk/lib/qa_32fc_32f_multiply_aligned16.cc | 6 +- volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 2 +- volk/lib/qa_utils.cc | 223 ++++++++++++++++++++++++++ volk/lib/qa_utils.h | 19 +++ volk/lib/qa_volk.cc | 2 +- 6 files changed, 247 insertions(+), 7 deletions(-) create mode 100644 volk/lib/qa_utils.cc create mode 100644 volk/lib/qa_utils.h (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 0aeafe4aa..a10b0a362 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -156,7 +156,6 @@ endif # ---------------------------------------------------------------- libvolk_qa_la_SOURCES = \ qa_volk.cc \ - qa_utils.cc \ qa_16s_quad_max_star_aligned16.cc \ qa_32fc_dot_prod_aligned16.cc \ qa_32fc_square_dist_aligned16.cc \ @@ -181,7 +180,6 @@ libvolk_qa_la_SOURCES = \ qa_32f_dot_prod_aligned16.cc \ qa_32f_dot_prod_unaligned16.cc \ qa_32f_fm_detect_aligned16.cc \ - qa_32fc_32f_multiply_aligned16.cc \ qa_32fc_multiply_aligned16.cc \ qa_32f_divide_aligned16.cc \ qa_32f_multiply_aligned16.cc \ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc index 7bb8d21c1..b80e0e008 100644 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -5,10 +5,11 @@ #include #include #include +#include -#define ERR_DELTA (1e-4) +#define TOLERANCE (1e-4) -void qa_32fc_32f_multiply_aligned16::t1() { +void qa_32fc_32f_multiply_aligned16(void) { const int vlen = 2046; const int ITERS = 100000; @@ -72,4 +73,3 @@ void qa_32fc_32f_multiply_aligned16::t1() { free(results[i]); } } - diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc index 94e63e37d..f753e1107 100644 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc @@ -40,7 +40,7 @@ void qa_8sc_deinterleave_16s_aligned16::t1() { start = clock(); for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc new file mode 100644 index 000000000..4d93ca62a --- /dev/null +++ b/volk/lib/qa_utils.cc @@ -0,0 +1,223 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include +//#include + +float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +template +t *make_aligned_buffer(unsigned int len) { + t *buf; + int ret; + ret = posix_memalign((void**)&buf, 16, len * sizeof(t)); + assert(ret == 0); + return buf; +} + +void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { + BOOST_FOREACH(std::string sig, inputsig) { + if (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else std::cout << "Invalid type!" << std::endl; + } +} + +static std::vector get_arch_list(const int archs[]) { + std::vector archlist; + int num_archs = archs[0]; + + //there has got to be a way to query these arches + for(int i = 0; i < num_archs; i++) { + switch(archs[i+1]) { + case (1< valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u"); + + BOOST_FOREACH(std::string this_type, valid_types) { + if(type == this_type) return true; + } + return false; +} + + +static void get_function_signature(std::vector &inputsig, + std::vector &outputsig, + std::string name) { + boost::char_separator sep("_"); + boost::tokenizer > tok(name, sep); + std::vector toked; + tok.assign(name); + toked.assign(tok.begin(), tok.end()); + + assert(toked[0] == "volk"); + + inputsig.push_back(toked[1]); //mandatory + int pos = 2; + bool valid_type = true; + while(valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]); + else valid_type = false; + pos++; + } + while(!valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) valid_type = true; + pos++; + } + while(valid_type && pos < toked.size()) { + if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); + else valid_type = false; + pos++; + } + + //if there's no explicit output sig then assume the output is the same as the first input + if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + assert(inputsig.size() != 0); + assert(outputsig.size() != 0); +} + +inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str()); +} + +inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str()); +} + +inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); +} + +bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) { + std::cout << "RUN_VOLK_TESTS: " << name << std::endl; + + //first let's get a list of available architectures for the test + std::vector arch_list = get_arch_list(archs); + + BOOST_FOREACH(std::string arch, arch_list) { + std::cout << "Found an arch: " << arch << std::endl; + } + + //now we have to get a function signature by parsing the name + std::vector inputsig, outputsig; + get_function_signature(inputsig, outputsig, name); + + for(int i=0; i inbuffs; + make_buffer_for_signature(inbuffs, inputsig, vlen); + + //and set the input buffers to something random + //TODO + + //allocate output buffers -- one for each output for each arch + std::vector outbuffs; + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + + //now run the test + clock_t start, end; + for(int i = 0; i < arch_list.size(); i++) { + start = clock(); + switch(outputsig.size()+inputsig.size()) { + case 2: + run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + case 3: + run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + case 4: + run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + break; + default: + break; + } + end = clock(); + std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + } + + //and now compare each output to the generic output + //first we have to know which output is the generic one, they aren't in order... + int generic_offset; + for(int i=0; i +#include +#include + +float uniform(void); +void random_floats(float *buf, unsigned n); + +bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); + +#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter) + +typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); +typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); + +#endif //VOLK_QA_UTILS_H diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc index c3c27b69b..8e7e59768 100644 --- a/volk/lib/qa_volk.cc +++ b/volk/lib/qa_volk.cc @@ -143,7 +143,7 @@ qa_volk::suite() s->addTest(qa_32f_dot_prod_aligned16::suite()); s->addTest(qa_32f_dot_prod_unaligned16::suite()); s->addTest(qa_32f_fm_detect_aligned16::suite()); - s->addTest(qa_32fc_32f_multiply_aligned16::suite()); + //s->addTest(qa_32fc_32f_multiply_aligned16::suite()); s->addTest(qa_32fc_multiply_aligned16::suite()); s->addTest(qa_32f_divide_aligned16::suite()); s->addTest(qa_32f_multiply_aligned16::suite()); -- cgit From 9a527257014878cac993ffe854bf8fdacc412be6 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 14 Jan 2011 13:07:06 -0800 Subject: Volk: QA code fixes, more Orc routines. Broke the 32fc_multiply Orc impl because I'm lame and lost some work. Fixed volk_8s_convert_16s Orc impl. Still need to rename functions and modify the QA sig parser to match. Then rewrite makefiles. --- volk/lib/qa_utils.cc | 94 ++++++++++++++++++++++++++++++++++++++++++---------- volk/lib/qa_utils.h | 2 +- 2 files changed, 77 insertions(+), 19 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 4d93ca62a..fa21db487 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,7 +3,7 @@ #include #include #include -#include +//#include #include #include #include @@ -13,19 +13,39 @@ #include #include #include -//#include float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) } -void -random_floats (float *buf, unsigned n) +void random_floats (float *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); } +void load_random_data(void *data, std::string sig, unsigned int n) { + if(sig == "32fc") { + random_floats((float *)data, n*2); + } else if(sig == "32f") { + random_floats((float *)data, n); + } else if(sig == "32u") { + for(int i=0; i((RAND_MAX/2))) * 32768.0)); + } else if(sig == "16sc") { + for(int i=0; i((RAND_MAX/2))) * 32768.0)); + } else if(sig == "8u") { + for(int i=0; i((RAND_MAX/2)) * 256.0)); + } else if(sig == "8s") { + for(int i=0; i((RAND_MAX/2)) * 128.0)); + } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl; +} + template t *make_aligned_buffer(unsigned int len) { t *buf; @@ -37,11 +57,11 @@ t *make_aligned_buffer(unsigned int len) { void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { BOOST_FOREACH(std::string sig, inputsig) { - if (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else std::cout << "Invalid type!" << std::endl; + if (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer(vlen)); + else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); + else std::cout << "Invalid type: " << sig << std::endl; } } @@ -90,7 +110,7 @@ static std::vector get_arch_list(const int archs[]) { } static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u"); + std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u"); BOOST_FOREACH(std::string this_type, valid_types) { if(type == this_type) return true; @@ -120,16 +140,23 @@ static void get_function_signature(std::vector &inputsig, } while(!valid_type && pos < toked.size()) { if(is_valid_type(toked[pos])) valid_type = true; - pos++; + else pos++; } while(valid_type && pos < toked.size()) { if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); else valid_type = false; pos++; } - - //if there's no explicit output sig then assume the output is the same as the first input - if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + + //if there's no output sig and only one input sig, assume there are 2 inputs + //this handles conversion fn's (which have a specified output sig) and most of the rest + if(outputsig.size() == 0 && inputsig.size() == 1) { + outputsig.push_back(inputsig[0]); + inputsig.push_back(inputsig[0]); + }//if there's no explicit output sig then assume the output is the same as the first input + else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); + + assert(inputsig.size() != 0); assert(outputsig.size() != 0); } @@ -168,7 +195,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, make_buffer_for_signature(inbuffs, inputsig, vlen); //and set the input buffers to something random - //TODO + for(int i=0; i outbuffs; @@ -204,9 +233,38 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(arch_list[i] == "generic") generic_offset=i; for(int i=0; i tol) { + std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; + return 1; + } + } + } else if(outputsig[0] == "32f") { + for(int j=0; j tol) { + std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; + return 1; + } + } + } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") { + for(int j=0; j get_arch_list(const int archs[]) { } static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u"); + std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f") + ("32s")("32u")("16sc")("16s") + ("16u")("8s")("8sc")("8u") + ("s32f")("s16u")("s16s")("s8u") + ("s8s"); BOOST_FOREACH(std::string this_type, valid_types) { if(type == this_type) return true; @@ -148,17 +152,11 @@ static void get_function_signature(std::vector &inputsig, pos++; } - //if there's no output sig and only one input sig, assume there are 2 inputs - //this handles conversion fn's (which have a specified output sig) and most of the rest - if(outputsig.size() == 0 && inputsig.size() == 1) { - outputsig.push_back(inputsig[0]); - inputsig.push_back(inputsig[0]); - }//if there's no explicit output sig then assume the output is the same as the first input - else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]); - - assert(inputsig.size() != 0); - assert(outputsig.size() != 0); +} + +inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buff, vlen, arch.c_str()); } inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { @@ -190,26 +188,42 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, for(int i=0; i inbuffs; make_buffer_for_signature(inbuffs, inputsig, vlen); + //allocate output buffers -- one for each output for each arch + std::vector outbuffs; + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + //and set the input buffers to something random for(int i=0; i outbuffs; - BOOST_FOREACH(std::string arch, arch_list) { - make_buffer_for_signature(outbuffs, outputsig, vlen); + //so let's see here. if the operation has no output sig, it operates in place, + //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers. + if(outputsig.size() == 0) { + //make a set of output buffers according to the input signature + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, inputsig, vlen); + } + //copy input buffer[0] to all the output buffers so it has something to operate on + //output buffer element size is the same as input buffer[0] + if( } + //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); switch(outputsig.size()+inputsig.size()) { + case 1: + run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + break; case 2: run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); break; @@ -262,6 +276,13 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, return 1; } } + } else if(outputsig[0] == "8s" || outputsig[0] == "8u") { + for(int j=0; j #include -#include float uniform(void); void random_floats(float *buf, unsigned n); @@ -12,6 +11,7 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); -- cgit From be1b7d9ffb90aa9c750e6c6793f00dbc8bec486d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 19 Jan 2011 16:39:28 -0800 Subject: Volk: test suite supports scalar arguments and in-place operations --- volk/lib/qa_utils.cc | 357 +++++++++++++++++++++++++++++++-------------------- volk/lib/qa_utils.h | 15 ++- 2 files changed, 231 insertions(+), 141 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index a8c00c143..e73b70985 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -7,7 +7,8 @@ #include #include #include -//#include +#include +#include //#include #include #include @@ -24,44 +25,53 @@ void random_floats (float *buf, unsigned n) buf[i] = uniform (); } -void load_random_data(void *data, std::string sig, unsigned int n) { - if(sig == "32fc") { - random_floats((float *)data, n*2); - } else if(sig == "32f") { +void load_random_data(void *data, volk_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + assert(type.size == 4); //TODO: double support random_floats((float *)data, n); - } else if(sig == "32u") { - for(int i=0; i((RAND_MAX/2))) * 32768.0)); - } else if(sig == "16sc") { - for(int i=0; i((RAND_MAX/2))) * 32768.0)); - } else if(sig == "8u") { - for(int i=0; i((RAND_MAX/2)) * 256.0)); - } else if(sig == "8s") { - for(int i=0; i((RAND_MAX/2)) * 128.0)); - } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl; + } else { + float int_max = pow(2, type.size*8); + if(type.is_signed) int_max /= 2.0; + for(int i=0; i((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw; //no shenanigans here + } + } + } } -template -t *make_aligned_buffer(unsigned int len) { - t *buf; +void *make_aligned_buffer(unsigned int len, unsigned int size) { + void *buf; int ret; - ret = posix_memalign((void**)&buf, 16, len * sizeof(t)); + ret = posix_memalign((void**)&buf, 16, len * size); assert(ret == 0); return buf; } -void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { - BOOST_FOREACH(std::string sig, inputsig) { - if (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer(vlen)); - else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer(vlen)); - else std::cout << "Invalid type: " << sig << std::endl; +void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); } } @@ -109,22 +119,56 @@ static std::vector get_arch_list(const int archs[]) { return archlist; } -static bool is_valid_type(std::string type) { - std::vector valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f") - ("32s")("32u")("16sc")("16s") - ("16u")("8s")("8sc")("8u") - ("s32f")("s16u")("s16s")("s8u") - ("s8s"); +volk_type_t volk_type_from_string(std::string name) { + volk_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + assert(name.size() > 1); - BOOST_FOREACH(std::string this_type, valid_types) { - if(type == this_type) return true; + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + int last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos < 0) throw 0; + //will throw if malformed + int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(int i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } } - return false; -} + return type; +} -static void get_function_signature(std::vector &inputsig, - std::vector &outputsig, +static void get_signatures_from_name(std::vector &inputsig, + std::vector &outputsig, std::string name) { boost::char_separator sep("_"); boost::tokenizer > tok(name, sep); @@ -133,25 +177,38 @@ static void get_function_signature(std::vector &inputsig, toked.assign(tok.begin(), tok.end()); assert(toked[0] == "volk"); - - inputsig.push_back(toked[1]); //mandatory - int pos = 2; - bool valid_type = true; - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]); - else valid_type = false; - pos++; - } - while(!valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) valid_type = true; - else pos++; - } - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); - else valid_type = false; - pos++; + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_type_from_string(token); + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); } +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buff, scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); +} + +template +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + for(int i=0; i tol) return 1; + } + return 0; +} + +template +bool icompare(t *in1, t *in2, unsigned int vlen) { + for(int i=0; i arch_list = get_arch_list(archs); - BOOST_FOREACH(std::string arch, arch_list) { - std::cout << "Found an arch: " << arch << std::endl; - } - //now we have to get a function signature by parsing the name - std::vector inputsig, outputsig; - get_function_signature(inputsig, outputsig, name); - - for(int i=0; i inbuffs; - make_buffer_for_signature(inbuffs, inputsig, vlen); + std::vector inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); - //allocate output buffers -- one for each output for each arch - std::vector outbuffs; - BOOST_FOREACH(std::string arch, arch_list) { - make_buffer_for_signature(outbuffs, outputsig, vlen); - } - - //and set the input buffers to something random + std::vector inputsc, outputsc; for(int i=0; i inbuffs, outbuffs; - //so let's see here. if the operation has no output sig, it operates in place, - //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers. - if(outputsig.size() == 0) { - //make a set of output buffers according to the input signature - BOOST_FOREACH(std::string arch, arch_list) { + if(outputsig.size() == 0) { //we're operating in place... + //assert(inputsig.size() == 1); //we only support 0 output 1 input right now... + make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer + load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data + BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer make_buffer_for_signature(outbuffs, inputsig, vlen); + memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1)); + } + } else { + make_buffer_for_signature(inbuffs, inputsig, vlen); + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + + //and set the input buffers to something random + for(int i=0; i1 scalars"; break; case 2: - run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]); + } else throw "unsupported 2 arg function >1 scalars"; break; case 3: run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); @@ -234,69 +328,52 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); break; default: + throw "no function handler for this signature"; break; } + end = clock(); std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; } - //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset; for(int i=0; i tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32f") { - for(int j=0; j tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") { - for(int j=0; j #include +struct volk_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_type_t volk_type_from_string(std::string); + float uniform(void); void random_floats(float *buf, unsigned n); @@ -11,9 +22,11 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) -typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); #endif //VOLK_QA_UTILS_H -- cgit From e3600f59e76c3dc08aedfd77629b7c5c48df86af Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 20 Jan 2011 16:30:09 -0800 Subject: volk: renamed all files. added all tests. some test things are still broken. --- volk/lib/qa_utils.cc | 101 +++++++++++++++++++++++++++++++-------------------- volk/lib/qa_utils.h | 1 + 2 files changed, 62 insertions(+), 40 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index e73b70985..4c151bd6f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -19,7 +19,8 @@ float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) } -void random_floats (float *buf, unsigned n) +template +void random_floats (t *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); @@ -28,8 +29,8 @@ void random_floats (float *buf, unsigned n) void load_random_data(void *data, volk_type_t type, unsigned int n) { if(type.is_complex) n *= 2; if(type.is_float) { - assert(type.size == 4); //TODO: double support - random_floats((float *)data, n); + if(type.size == 8) random_floats((double *)data, n); + else random_floats((float *)data, n); } else { float int_max = pow(2, type.size*8); if(type.is_signed) int_max /= 2.0; @@ -54,7 +55,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; break; default: - throw; //no shenanigans here + throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here } } } @@ -94,6 +95,9 @@ static std::vector get_arch_list(const int archs[]) { case (1< 1); + if(name.size() < 2) throw std::string("name too short to be a datatype"); //is it a scalar? if(name[0] == 's') { @@ -138,7 +142,7 @@ volk_type_t volk_type_from_string(std::string name) { //get the data size int last_size_pos = name.find_last_of("0123456789"); - if(last_size_pos < 0) throw 0; + if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); //will throw if malformed int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); @@ -182,12 +186,14 @@ static void get_signatures_from_name(std::vector &inputsig, //ok. we're assuming a string in the form //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) - enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT; + enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; std::string fn_name; volk_type_t type; BOOST_FOREACH(std::string token, toked) { try { type = volk_type_from_string(token); + if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... + if(side == SIDE_INPUT) inputsig.push_back(type); else outputsig.push_back(type); } catch (...){ @@ -201,9 +207,11 @@ static void get_signatures_from_name(std::vector &inputsig, } } else if(side == SIDE_INPUT) { //it's the function name, at least it better be - side = SIDE_OUTPUT; - fn_name = token; - } else { + side = SIDE_NAME; + fn_name.append("_"); + fn_name.append(token); + } + else if(side == SIDE_OUTPUT) { if(token != toked.back()) throw; //the last token in the name is the alignment } } @@ -236,20 +244,40 @@ inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vect while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); } +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str()); +} + template bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; for(int i=0; i tol) return 1; + if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } } - return 0; + + return fail; } template -bool icompare(t *in1, t *in2, unsigned int vlen) { +bool icompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; for(int i=0; i 0) { + std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl; + } + } } - return 0; + + return fail; } bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) { @@ -300,7 +328,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, load_random_data(inbuffs[i], inputsig[i], vlen); } } - + //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { @@ -311,18 +339,22 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: - run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + } else throw "unsupported 3 arg function >1 scalars"; break; case 4: run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); @@ -337,29 +369,24 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, } //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... - int generic_offset; + int generic_offset=0; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { @@ -265,14 +266,14 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { } template -bool icompare(t *in1, t *in2, unsigned int vlen, float tol) { +bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { - std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl; + std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])) << std::endl; } } } @@ -339,21 +340,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: @@ -375,7 +376,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //now compare if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know - + //TODO: loop over the output signature as well bool fail = false; for(int i=0; i &inputsig, assert(inputsig.size() != 0); } -inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(buff, vlen, arch.c_str()); +inline void run_cast_test1(volk_fn_1arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], vlen, arch.c_str()); } -inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str()); +inline void run_cast_test2(volk_fn_2arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); } -inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str()); +inline void run_cast_test3(volk_fn_3arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); } -inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); +inline void run_cast_test4(volk_fn_4arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); } -inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(buff, scalar, vlen, arch.c_str()); +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } -inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } -inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { - while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str()); +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } template @@ -253,7 +254,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { @@ -291,74 +292,70 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); - std::vector inputsc, outputsc; + //pull the input scalars into their own vector + std::vector inputsc; for(int i=0; i inbuffs, outbuffs; + std::vector inbuffs; + + make_buffer_for_signature(inbuffs, inputsig, vlen); + for(int i=0; i > test_data; + for(int i=0; i arch_buffs; + for(int j=0; j both_sigs; + both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); + both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); - switch(inputsig.size() + outputsig.size()) { + switch(both_sigs.size()) { case 1: if(inputsc.size() == 0) { - run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { - run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { - run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: - run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); break; default: throw "no function handler for this signature"; @@ -375,61 +372,63 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, if(arch_list[i] == "generic") generic_offset=i; //now compare - if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know - //TODO: loop over the output signature as well + //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know + bool fail = false; + bool fail_global = false; for(int i=0; i1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { - run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]); + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 79fc8f006..e2539060a 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -18,9 +18,9 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); +bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); -#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 4dd7f7599..9f4934dc0 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -7,93 +7,93 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { //in order... // VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); // VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000); // VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); // VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); } -- cgit From 6091bad60cdfdf21624da452c7a8b74405345070 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 21 Jan 2011 15:41:30 -0800 Subject: Volk: removed all the old QA code that is covered by the test framework. --- volk/lib/Makefile.am | 1 - volk/lib/qa_16s_convert_32f_aligned16.cc | 74 ------- volk/lib/qa_16s_convert_32f_aligned16.h | 18 -- volk/lib/qa_16s_convert_32f_unaligned16.cc | 74 ------- volk/lib/qa_16s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_16s_convert_8s_aligned16.cc | 61 ------ volk/lib/qa_16s_convert_8s_aligned16.h | 18 -- volk/lib/qa_16s_convert_8s_unaligned16.cc | 61 ------ volk/lib/qa_16s_convert_8s_unaligned16.h | 18 -- volk/lib/qa_16s_max_star_aligned16.cc | 65 ------- volk/lib/qa_16s_max_star_aligned16.h | 18 -- volk/lib/qa_16s_max_star_horizontal_aligned16.cc | 79 -------- volk/lib/qa_16s_max_star_horizontal_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 89 --------- volk/lib/qa_16sc_deinterleave_16s_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_32f_aligned16.cc | 75 -------- volk/lib/qa_16sc_deinterleave_32f_aligned16.h | 18 -- .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc | 72 ------- volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h | 18 -- .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 124 ------------ volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h | 18 -- volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc | 70 ------- volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h | 18 -- volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 81 -------- volk/lib/qa_16sc_magnitude_16s_aligned16.h | 18 -- volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 131 ------------- volk/lib/qa_16sc_magnitude_32f_aligned16.h | 18 -- volk/lib/qa_16u_byteswap_aligned16.cc | 71 ------- volk/lib/qa_16u_byteswap_aligned16.h | 18 -- volk/lib/qa_32f_accumulator_aligned16.cc | 57 ------ volk/lib/qa_32f_accumulator_aligned16.h | 18 -- volk/lib/qa_32f_add_aligned16.cc | 123 ------------ volk/lib/qa_32f_add_aligned16.h | 18 -- .../qa_32f_calc_spectral_noise_floor_aligned16.cc | 60 ------ .../qa_32f_calc_spectral_noise_floor_aligned16.h | 18 -- volk/lib/qa_32f_convert_16s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_16s_aligned16.h | 18 -- volk/lib/qa_32f_convert_16s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_16s_unaligned16.h | 18 -- volk/lib/qa_32f_convert_32s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_32s_aligned16.h | 18 -- volk/lib/qa_32f_convert_32s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_32s_unaligned16.h | 18 -- volk/lib/qa_32f_convert_64f_aligned16.cc | 61 ------ volk/lib/qa_32f_convert_64f_aligned16.h | 18 -- volk/lib/qa_32f_convert_64f_unaligned16.cc | 61 ------ volk/lib/qa_32f_convert_64f_unaligned16.h | 18 -- volk/lib/qa_32f_convert_8s_aligned16.cc | 71 ------- volk/lib/qa_32f_convert_8s_aligned16.h | 18 -- volk/lib/qa_32f_convert_8s_unaligned16.cc | 71 ------- volk/lib/qa_32f_convert_8s_unaligned16.h | 18 -- volk/lib/qa_32f_divide_aligned16.cc | 133 ------------- volk/lib/qa_32f_divide_aligned16.h | 18 -- volk/lib/qa_32f_dot_prod_aligned16.cc | 183 ------------------ volk/lib/qa_32f_dot_prod_aligned16.h | 18 -- volk/lib/qa_32f_dot_prod_unaligned16.cc | 190 ------------------ volk/lib/qa_32f_dot_prod_unaligned16.h | 18 -- volk/lib/qa_32f_interleave_16sc_aligned16.cc | 76 -------- volk/lib/qa_32f_interleave_16sc_aligned16.h | 18 -- volk/lib/qa_32f_interleave_32fc_aligned16.cc | 63 ------ volk/lib/qa_32f_interleave_32fc_aligned16.h | 18 -- volk/lib/qa_32f_max_aligned16.cc | 70 ------- volk/lib/qa_32f_max_aligned16.h | 18 -- volk/lib/qa_32f_min_aligned16.cc | 70 ------- volk/lib/qa_32f_min_aligned16.h | 18 -- volk/lib/qa_32f_multiply_aligned16.cc | 123 ------------ volk/lib/qa_32f_multiply_aligned16.h | 18 -- volk/lib/qa_32f_normalize_aligned16.cc | 79 -------- volk/lib/qa_32f_normalize_aligned16.h | 18 -- volk/lib/qa_32f_power_aligned16.cc | 95 --------- volk/lib/qa_32f_power_aligned16.h | 18 -- volk/lib/qa_32f_sqrt_aligned16.cc | 128 ------------ volk/lib/qa_32f_sqrt_aligned16.h | 18 -- volk/lib/qa_32f_stddev_aligned16.cc | 75 -------- volk/lib/qa_32f_stddev_aligned16.h | 18 -- volk/lib/qa_32f_stddev_and_mean_aligned16.cc | 76 -------- volk/lib/qa_32f_stddev_and_mean_aligned16.h | 18 -- volk/lib/qa_32f_subtract_aligned16.cc | 70 ------- volk/lib/qa_32f_subtract_aligned16.h | 18 -- volk/lib/qa_32f_sum_of_poly_aligned16.cc | 142 -------------- volk/lib/qa_32f_sum_of_poly_aligned16.h | 18 -- volk/lib/qa_32fc_32f_multiply_aligned16.cc | 75 -------- volk/lib/qa_32fc_32f_multiply_aligned16.h | 18 -- volk/lib/qa_32fc_32f_power_32fc_aligned16.cc | 83 -------- volk/lib/qa_32fc_32f_power_32fc_aligned16.h | 18 -- volk/lib/qa_32fc_atan2_32f_aligned16.cc | 76 -------- volk/lib/qa_32fc_atan2_32f_aligned16.h | 18 -- volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc | 138 ------------- volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h | 18 -- volk/lib/qa_32fc_deinterleave_32f_aligned16.cc | 64 ------ volk/lib/qa_32fc_deinterleave_32f_aligned16.h | 18 -- volk/lib/qa_32fc_deinterleave_64f_aligned16.cc | 64 ------ volk/lib/qa_32fc_deinterleave_64f_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h | 18 -- .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc | 61 ------ volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h | 18 -- volk/lib/qa_32fc_dot_prod_aligned16.cc | 214 --------------------- volk/lib/qa_32fc_dot_prod_aligned16.h | 20 -- volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 80 -------- volk/lib/qa_32fc_magnitude_16s_aligned16.h | 18 -- volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 80 -------- volk/lib/qa_32fc_magnitude_32f_aligned16.h | 18 -- volk/lib/qa_32fc_multiply_aligned16.cc | 98 ---------- volk/lib/qa_32fc_multiply_aligned16.h | 18 -- volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc | 64 ------ volk/lib/qa_32fc_power_spectrum_32f_aligned16.h | 18 -- volk/lib/qa_32fc_square_dist_aligned16.cc | 91 --------- volk/lib/qa_32fc_square_dist_aligned16.h | 18 -- .../qa_32fc_square_dist_scalar_mult_aligned16.cc | 96 --------- .../qa_32fc_square_dist_scalar_mult_aligned16.h | 18 -- volk/lib/qa_32s_and_aligned16.cc | 70 ------- volk/lib/qa_32s_and_aligned16.h | 18 -- volk/lib/qa_32s_convert_32f_aligned16.cc | 61 ------ volk/lib/qa_32s_convert_32f_aligned16.h | 18 -- volk/lib/qa_32s_convert_32f_unaligned16.cc | 61 ------ volk/lib/qa_32s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_32s_or_aligned16.cc | 70 ------- volk/lib/qa_32s_or_aligned16.h | 18 -- volk/lib/qa_32u_byteswap_aligned16.cc | 60 ------ volk/lib/qa_32u_byteswap_aligned16.h | 18 -- volk/lib/qa_64f_convert_32f_aligned16.cc | 61 ------ volk/lib/qa_64f_convert_32f_aligned16.h | 18 -- volk/lib/qa_64f_convert_32f_unaligned16.cc | 61 ------ volk/lib/qa_64f_convert_32f_unaligned16.h | 18 -- volk/lib/qa_64f_max_aligned16.cc | 61 ------ volk/lib/qa_64f_max_aligned16.h | 18 -- volk/lib/qa_64f_min_aligned16.cc | 61 ------ volk/lib/qa_64f_min_aligned16.h | 18 -- volk/lib/qa_64u_byteswap_aligned16.cc | 60 ------ volk/lib/qa_64u_byteswap_aligned16.h | 18 -- volk/lib/qa_8s_convert_16s_aligned16.cc | 64 ------ volk/lib/qa_8s_convert_16s_aligned16.h | 18 -- volk/lib/qa_8s_convert_16s_unaligned16.cc | 64 ------ volk/lib/qa_8s_convert_16s_unaligned16.h | 18 -- volk/lib/qa_8s_convert_32f_aligned16.cc | 72 ------- volk/lib/qa_8s_convert_32f_aligned16.h | 18 -- volk/lib/qa_8s_convert_32f_unaligned16.cc | 64 ------ volk/lib/qa_8s_convert_32f_unaligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_16s_aligned16.cc | 68 ------- volk/lib/qa_8sc_deinterleave_16s_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_32f_aligned16.cc | 135 ------------- volk/lib/qa_8sc_deinterleave_32f_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc | 65 ------- volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 139 ------------- volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h | 18 -- volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc | 61 ------ volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h | 18 -- .../qa_8sc_multiply_conjugate_16sc_aligned16.cc | 87 --------- .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h | 18 -- .../qa_8sc_multiply_conjugate_32fc_aligned16.cc | 87 --------- .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h | 18 -- volk/lib/qa_volk.cc | 211 -------------------- volk/lib/qa_volk.h | 36 ---- volk/lib/test_all.cc | 82 -------- 158 files changed, 8144 deletions(-) delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h delete mode 100644 volk/lib/qa_16s_max_star_aligned16.cc delete mode 100644 volk/lib/qa_16s_max_star_aligned16.h delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.h delete mode 100644 volk/lib/qa_32f_add_aligned16.cc delete mode 100644 volk/lib/qa_32f_add_aligned16.h delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h delete mode 100644 volk/lib/qa_32f_divide_aligned16.cc delete mode 100644 volk/lib/qa_32f_divide_aligned16.h delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h delete mode 100644 volk/lib/qa_32f_max_aligned16.cc delete mode 100644 volk/lib/qa_32f_max_aligned16.h delete mode 100644 volk/lib/qa_32f_min_aligned16.cc delete mode 100644 volk/lib/qa_32f_min_aligned16.h delete mode 100644 volk/lib/qa_32f_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32f_multiply_aligned16.h delete mode 100644 volk/lib/qa_32f_normalize_aligned16.cc delete mode 100644 volk/lib/qa_32f_normalize_aligned16.h delete mode 100644 volk/lib/qa_32f_power_aligned16.cc delete mode 100644 volk/lib/qa_32f_power_aligned16.h delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.h delete mode 100644 volk/lib/qa_32f_stddev_aligned16.cc delete mode 100644 volk/lib/qa_32f_stddev_aligned16.h delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h delete mode 100644 volk/lib/qa_32f_subtract_aligned16.cc delete mode 100644 volk/lib/qa_32f_subtract_aligned16.h delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.h delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h delete mode 100644 volk/lib/qa_32s_and_aligned16.cc delete mode 100644 volk/lib/qa_32s_and_aligned16.h delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_32s_or_aligned16.cc delete mode 100644 volk/lib/qa_32s_or_aligned16.h delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_64f_max_aligned16.cc delete mode 100644 volk/lib/qa_64f_max_aligned16.h delete mode 100644 volk/lib/qa_64f_min_aligned16.cc delete mode 100644 volk/lib/qa_64f_min_aligned16.h delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h delete mode 100644 volk/lib/qa_volk.cc delete mode 100644 volk/lib/qa_volk.h delete mode 100644 volk/lib/test_all.cc (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 63df85244..bbc993fa2 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -110,7 +110,6 @@ endif # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_volk.h \ qa_utils.h \ assembly.h diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc deleted file mode 100644 index 6215f4a64..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h deleted file mode 100644 index ef813d96f..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc deleted file mode 100644 index 46c2e48ac..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_unaligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h deleted file mode 100644 index aeb04f770..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc deleted file mode 100644 index 8225aa0cf..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h deleted file mode 100644 index 2e409d0cc..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H - -#include -#include - -class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc deleted file mode 100644 index e6ce5030e..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h deleted file mode 100644 index 4b2fe9e42..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H - -#include -#include - -class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc deleted file mode 100644 index c6f828ba6..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include -#include -#include -#include -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - - -void qa_16s_max_star_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 6400; - const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[1] __attribute__ ((aligned (16))); - - short output1[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h deleted file mode 100644 index 119f87c4d..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H - -#include -#include - -class qa_16s_max_star_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc deleted file mode 100644 index 0a58570e2..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_horizontal_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - -void qa_16s_max_star_horizontal_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 32; - const int ITERS = 1; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[vlen>>1] __attribute__ ((aligned (16))); - - short output1[vlen>>1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))); - - short minus0 = ((short) (rand() - (RAND_MAX/2))); - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_horizontal_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - /* volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/ - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < (vlen >> 1); ++i) { - // printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - - } - for(int i = 0; i < (vlen >> 1); ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } - } - - -#endif - diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h deleted file mode 100644 index 9f9757253..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H - -#include -#include - -class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index aadc39067..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,89 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_sse21[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_orc1[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 995ab5b34..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 13151be13..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16sc_deinterleave_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - float output_sse21[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_orc1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h deleted file mode 100644 index fea3b6c2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index c67064ea6..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index ebb70b97a..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index f86f03b88..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE */ - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE4_1 */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index e83426473..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index 803caaa2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int8_t output_orc[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 04e5511e5..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include -#include - -class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc deleted file mode 100644 index 7fbdd8620..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h deleted file mode 100644 index 4664b70f4..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H - -#include -#include - -class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc deleted file mode 100644 index 54cc2ba6e..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,131 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_32f_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2)); - } - printf("16sc_magnitude_32f_aligned\n"); - - float scale = 32768.0; - for(int i = 0; i < vlen; ++i) { - float re = (float)(input0[i].real())/scale; - float im = (float)(input0[i].imag())/scale; - output_known[i] = sqrt(re*re + im*im); - } - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 100; ++i) { - printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag()); - printf("generic... %f == %f\n", output_generic[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); - } -} - -#else - -void qa_16sc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("16sc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); -/* start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); -*/ - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); -// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h deleted file mode 100644 index 0c25673ea..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H - -#include -#include - -class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc deleted file mode 100644 index c2295968b..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint16_t output0[vlen] __attribute__ ((aligned (16))); - uint16_t output01[vlen] __attribute__ ((aligned (16))); - uint16_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint16_t)); - memcpy(output02, output0, vlen*sizeof(uint16_t)); - - printf("16u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output02, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h deleted file mode 100644 index e11b23e3f..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_16u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc deleted file mode 100644 index 0defef283..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_accumulator_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_accumulator_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float accumulator_generic; - float accumulator_sse; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_accumulator_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4); -} - -#endif diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h deleted file mode 100644 index 0004d3ff0..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H - -#include -#include - -class qa_32f_accumulator_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc deleted file mode 100644 index a183d4d85..000000000 --- a/volk/lib/qa_32f_add_aligned16.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_add_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] + input1[i]; - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_add_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h deleted file mode 100644 index 58e2a151c..000000000 --- a/volk/lib/qa_32f_add_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H -#define INCLUDED_QA_32F_ADD_ALIGNED16_H - -#include -#include - -class qa_32f_add_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_add_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc deleted file mode 100644 index 5d6987333..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[1] __attribute__ ((aligned (16))); - float output01[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_calc_spectral_noise_floor_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h deleted file mode 100644 index c5dce2c4b..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H - -#include -#include - -class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc deleted file mode 100644 index 3e2452e68..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h deleted file mode 100644 index fce1eb417..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc deleted file mode 100644 index e016b7ff7..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h deleted file mode 100644 index 492bc80e6..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc deleted file mode 100644 index abceb52fb..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_32s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h deleted file mode 100644 index 97d854463..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc deleted file mode 100644 index 90f84b56f..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_32s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h deleted file mode 100644 index 5d662d86d..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc deleted file mode 100644 index 1d0754ac9..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h deleted file mode 100644 index 41eb3e094..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H - -#include -#include - -class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc deleted file mode 100644 index 6f7d5066d..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_64f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h deleted file mode 100644 index 4b144f033..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc deleted file mode 100644 index 6a53629b5..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h deleted file mode 100644 index 68a523f34..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H - -#include -#include - -class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc deleted file mode 100644 index fbc5c20e6..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h deleted file mode 100644 index 88d4ff42a..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H - -#include -#include - -class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc deleted file mode 100644 index f2a1b9e7f..000000000 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_divide_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output1[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] / input1[i]; - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]); - } -} - -#else - -void qa_32f_divide_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h deleted file mode 100644 index 79d5ae4b8..000000000 --- a/volk/lib/qa_32f_divide_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H -#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H - -#include -#include - -class qa_32f_divide_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc deleted file mode 100644 index 98c1f2d99..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_aligned16::t1() { - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h deleted file mode 100644 index 6931a9e98..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc deleted file mode 100644 index 8e97d4249..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.cc +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_unaligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - input = &input[1]; // Make sure the buffer is unaligned - taps = &taps[1]; // Make sure the buffer is unaligned - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(&input[-1]); - free(&taps[-1]); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h deleted file mode 100644 index e8bad07fe..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H - -#include -#include - -class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc deleted file mode 100644 index a7ae60780..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32f_interleave_16sc_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_16sc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex output_generic[vlen] __attribute__ ((aligned (16))); - std::complex output_sse[vlen] __attribute__ ((aligned (16))); - std::complex output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_interleave_16sc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h deleted file mode 100644 index 8d2914817..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H - -#include -#include - -class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc deleted file mode 100644 index 333b6fce8..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_interleave_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_32fc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex output_generic[vlen] __attribute__ ((aligned (16))); - std::complex output_sse[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_interleave_32fc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h deleted file mode 100644 index cba518d37..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H - -#include -#include - -class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc deleted file mode 100644 index 98f8ce9bc..000000000 --- a/volk/lib/qa_32f_max_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_max_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h deleted file mode 100644 index d535479f4..000000000 --- a/volk/lib/qa_32f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H -#define INCLUDED_QA_32F_MAX_ALIGNED16_H - -#include -#include - -class qa_32f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc deleted file mode 100644 index 798b47c53..000000000 --- a/volk/lib/qa_32f_min_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_min_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h deleted file mode 100644 index 90961ac92..000000000 --- a/volk/lib/qa_32f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H -#define INCLUDED_QA_32F_MIN_ALIGNED16_H - -#include -#include - -class qa_32f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc deleted file mode 100644 index aa17cd62e..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_multiply_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - output_known[i] = input0[i] * input1[i]; - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_multiply_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h deleted file mode 100644 index 7032a2ad4..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc deleted file mode 100644 index 0da43ecff..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_normalize_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_normalize_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 320001; - const int ITERS = 100; - - float* output0; - float* output01; - float* output02; - ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float)); - - for(int i = 0; i < vlen; ++i) { - output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(float)); - memcpy(output02, output0, vlen*sizeof(float)); - printf("32f_normalize_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - // printf("%e...%e\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4); - } - - free(output0); - free(output01); - free(output02); -} - -#endif diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h deleted file mode 100644 index 7c421eb82..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H -#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H - -#include -#include - -class qa_32f_normalize_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc deleted file mode 100644 index 1b331daeb..000000000 --- a/volk/lib/qa_32f_power_aligned16.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32f_power_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float* input; - int i; - - float* result_generic; - float* result_sse; - float* result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float)); - - random_floats((float*)input, vlen); - - const float power = 3; - - printf("32f_power_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - - for(i = 0; i < vlen; i++){ - //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32f_power_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h deleted file mode 100644 index d45df4e56..000000000 --- a/volk/lib/qa_32f_power_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H -#define INCLUDED_QA_32F_POWER_ALIGNED16_H - -#include -#include - -class qa_32f_power_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_power_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc deleted file mode 100644 index c216ce5d5..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * . - */ - -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_sqrt_aligned16::t1() { - printf("sse not available... no test performed\n"); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); - output_known[i] = sqrt(input0[i]); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f\n", input0[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4); - } -} - -#else - -void qa_32f_sqrt_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast(RAND_MAX)); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h deleted file mode 100644 index e4b99d981..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H -#define INCLUDED_QA_32F_SQRT_ALIGNED16_H - -#include -#include - -class qa_32f_sqrt_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc deleted file mode 100644 index 5934d70df..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean = 0; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - mean += input0[i]; - } - mean /= static_cast(vlen); - - printf("32f_stddev_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h deleted file mode 100644 index 7f8d7a5fc..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H - -#include -#include - -class qa_32f_stddev_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc deleted file mode 100644 index 78c701d78..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_and_mean_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_and_mean_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean_generic; - float mean_sse; - float mean_sse4_1; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_stddev_and_mean_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h deleted file mode 100644 index e08bd249a..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H - -#include -#include - -class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc deleted file mode 100644 index 1e2210203..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_subtract_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_subtract_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - float output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("32f_subtract_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h deleted file mode 100644 index 97c14f129..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H -#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H - -#include -#include - -class qa_32f_subtract_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc deleted file mode 100644 index 494776357..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define SNR 30.0 -#define CENTER -4.0 -#define CUTOFF -5.595 -#define ERR_DELTA (1e-4) -#define NUM_ITERS 100000 -#define VEC_LEN 64 -static float uniform() { - return ((float) rand() / RAND_MAX); // uniformly (0, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * -SNR/2.0; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32f_sum_of_poly_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32f_sum_of_poly_aligned16::t1(){ - int i = 0; - - volk_environment_init(); - int ret; - - const int vlen = VEC_LEN; - float cutoff = CUTOFF; - - float* center_point_array; - float* target; - float* target_generic; - float* src0 ; - - - ret = posix_memalign((void**)¢er_point_array, 16, 24); - ret = posix_memalign((void**)&target, 16, 4); - ret = posix_memalign((void**)&target_generic, 16, 4); - ret = posix_memalign((void**)&src0, 16, (vlen << 2)); - - - random_floats((float*)src0, vlen); - - float a = (float)CENTER; - float etoa = expf(a); - center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 + - (-4.0 * a * a * a)/24.0 + - (3.0 * a * a)/6.0 + - (-2.0 * a)/2.0 + - (1.0)) * etoa; - center_point_array[1] = (//(-10.0 * a * a * a)/120.0 + - (6.0 * a * a)/24.0 + - (-3.0 * a)/6.0 + - (1.0/2.0)) * etoa; - center_point_array[2] = (//(10.0 * a * a)/120.0 + - (-4.0 * a)/24.0 + - (1.0/6.0)) * etoa; - center_point_array[3] = (//(-5.0 * a)/120.0 + - (1.0/24.0)) * etoa; - //center_point_array[4] = ((1.0)/120.0) * etoa; - center_point_array[4] = (//(a * a * a * a * a)/120.0 + - (a * a * a * a)/24.0 + - (a * a * a)/-6.0 + - (a * a)/2.0 + - -a + 1.0) * etoa; - - printf("32f_sum_of_poly_aligned16\n"); - - clock_t start, end; - double total; - - float my_sum = 0.0; - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - float sum = 0.0; - for(int l = 0; l < vlen; ++l) { - - sum += expf(src0[l]); - - } - my_sum = sum; - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("exp time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - - volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic"); - - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 approx time: %f\n", total); - - - - printf("exp: %f, sse3: %f\n", my_sum, target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA); - - - free(center_point_array); - free(target); - free(target_generic); - free(src0); - - -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h deleted file mode 100644 index 67a347f9a..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H - -#include -#include - -class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc deleted file mode 100644 index b80e0e008..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#define TOLERANCE (1e-4) - -void qa_32fc_32f_multiply_aligned16(void) { - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - float * taps; - int i; - std::vector archs; - archs.push_back("generic"); -#ifdef LV_HAVE_SSE3 - archs.push_back("sse3"); -#endif -#ifdef LV_HAVE_ORC - archs.push_back("orc"); -#endif - - std::vector* > results; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - - for(i=0; i < archs.size(); i++) { - std::complex *ptr; - ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float)); - if(ret) { - printf("Couldn't allocate memory\n"); - exit(1); - } - results.push_back(ptr); - } - - random_floats((float*)input, vlen * 2); - random_floats(taps, vlen); - - printf("32fc_32f_multiply_aligned16\n"); - - for(i=0; i < archs.size(); i++) { - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str()); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("%s_time: %f\n", archs[i].c_str(), total); - } - - for(i=0; i < vlen; i++) { - int j = 1; - for(j; j < archs.size(); j++) { - assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA); - } - } - - free(input); - free(taps); - for(i=0; i < archs.size(); i++) { - free(results[i]); - } -} diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h deleted file mode 100644 index fc3b3eeb2..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc deleted file mode 100644 index 64ea65da9..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1.5e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32fc_32f_power_32fc_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - int i; - - std::complex* result_generic; - std::complex* result_sse; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - - const float power = 3.2; - - printf("32fc_32f_power_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32fc_32f_power_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h deleted file mode 100644 index 464b7b7cc..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H - -#include -#include - -class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc deleted file mode 100644 index c55ab5aa0..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_atan2_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_atan2_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_atan2_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h deleted file mode 100644 index 9c4dc209a..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc deleted file mode 100644 index 2f9a30395..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -#include -#include - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h deleted file mode 100644 index 507b1769b..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 72e084c05..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h deleted file mode 100644 index 78660e6ad..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc deleted file mode 100644 index 89770c236..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_generic1[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - double output_sse21[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h deleted file mode 100644 index f924b9752..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 7472476f7..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 68b80f27d..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 5cbdc49b3..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 765450bb6..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc deleted file mode 100644 index 4147e30ae..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h deleted file mode 100644 index 3e55fb812..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H - -#include -#include - -class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc deleted file mode 100644 index bcf9ea954..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include -#include - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - - - -#if LV_HAVE_SSE3 -void qa_32fc_dot_prod_aligned16::t1() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_32 -void qa_32fc_dot_prod_aligned16::t2() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_32_time: %f\n", total); - - printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t2() { - printf("sse_32 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_64 - -void qa_32fc_dot_prod_aligned16::t3() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex(0,0); - result_sse3[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_64_time: %f\n", total); - - printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t3() { - printf("sse_64 not available... no test performed\n"); -} - - - -#endif diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h deleted file mode 100644 index 4b360db27..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H - -#include -#include - -class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); - void t2 (); - void t3 (); -}; - - -#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc deleted file mode 100644 index c718b6b71..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_orc[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h deleted file mode 100644 index ffdf1dd9e..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H - -#include -#include - -class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc deleted file mode 100644 index 1d475fb86..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_orc[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - printf("32fc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h deleted file mode 100644 index a2881308c..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc deleted file mode 100644 index 022b58ad6..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include -#include - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - int i; - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse3; - std::complex* result_orc; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - free(result_orc); - -} -#else -void qa_32fc_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h deleted file mode 100644 index c8abaa8fe..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H - -#include -#include - -class qa_32fc_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc deleted file mode 100644 index 1444c78a9..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse3 - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - const float scalar = vlen; - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); - } - - printf("32fc_power_spectrum_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); - } -} - -#endif diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h deleted file mode 100644 index d991223f3..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H - -#include -#include - -class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc deleted file mode 100644 index d9ead8495..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include -#include -#include - -#define ERR_DELTA (1e-4) -#define NUM_ITERS 10000000 -#define VEC_LEN 64 -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex* src0 ; - std::complex* points; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - - printf("32fc_square_dist_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(; i < vlen; ++i) { - //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h deleted file mode 100644 index 9d365d8b0..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H - -#include -#include - -class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc deleted file mode 100644 index f923d1d5c..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define ERR_DELTA .0001 -#define NUM_ITERS 10000000 -#define VEC_LEN 64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex* src0 ; - std::complex* points; - float scalar; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - random_floats(&scalar, 1); - - printf("32fc_square_dist_scalar_mult_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(i = 0; i < vlen; ++i) { - printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h deleted file mode 100644 index ac4e3c45b..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H - -#include -#include - -class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc deleted file mode 100644 index d20682147..000000000 --- a/volk/lib/qa_32s_and_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_and_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_and_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - int32_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_and_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h deleted file mode 100644 index dfcb47c63..000000000 --- a/volk/lib/qa_32s_and_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H -#define INCLUDED_QA_32S_AND_ALIGNED16_H - -#include -#include - -class qa_32s_and_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_and_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc deleted file mode 100644 index 07d799809..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h deleted file mode 100644 index efd2a2eea..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc deleted file mode 100644 index 2ec610ffb..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h deleted file mode 100644 index 5006f5fd8..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc deleted file mode 100644 index bebf779b0..000000000 --- a/volk/lib/qa_32s_or_aligned16.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_or_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_or_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - int32_t output02[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_or_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h deleted file mode 100644 index 9e949eb52..000000000 --- a/volk/lib/qa_32s_or_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H -#define INCLUDED_QA_32S_OR_ALIGNED16_H - -#include -#include - -class qa_32s_or_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_or_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc deleted file mode 100644 index 313c786b6..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint32_t output0[vlen] __attribute__ ((aligned (16))); - uint32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint32_t)); - printf("32u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h deleted file mode 100644 index 47bad4c3d..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_32u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc deleted file mode 100644 index 7f9c4584a..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h deleted file mode 100644 index 95d79f73d..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc deleted file mode 100644 index 98aadbf4d..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h deleted file mode 100644 index 430327e81..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc deleted file mode 100644 index 76e755514..000000000 --- a/volk/lib/qa_64f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_max_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h deleted file mode 100644 index 7cbd4d4c1..000000000 --- a/volk/lib/qa_64f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H -#define INCLUDED_QA_64F_MAX_ALIGNED16_H - -#include -#include - -class qa_64f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc deleted file mode 100644 index 4b70d2881..000000000 --- a/volk/lib/qa_64f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_min_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); - } - printf("64f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h deleted file mode 100644 index a0e95395f..000000000 --- a/volk/lib/qa_64f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H -#define INCLUDED_QA_64F_MIN_ALIGNED16_H - -#include -#include - -class qa_64f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc deleted file mode 100644 index 20d012c9e..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint64_t output0[vlen] __attribute__ ((aligned (16))); - uint64_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint64_t)); - printf("64u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h deleted file mode 100644 index a4fa0c983..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H - -#include -#include - -class qa_64u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc deleted file mode 100644 index 8dd5f76ca..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h deleted file mode 100644 index 38739fc96..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H - -#include -#include - -class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc deleted file mode 100644 index 12c502d4b..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_unaligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h deleted file mode 100644 index d39fffc35..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H - -#include -#include - -class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc deleted file mode 100644 index f27e60552..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("orc_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h deleted file mode 100644 index 7f8401d42..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H - -#include -#include - -class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc deleted file mode 100644 index 43468b1b1..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_unaligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen+1] __attribute__ ((aligned (16))); - - float output_generic[vlen+1] __attribute__ ((aligned (16))); - float output_sse4_1[vlen+1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%e...%e\n", output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h deleted file mode 100644 index aad2f8c22..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H - -#include -#include - -class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index f753e1107..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_11[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse4_11[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 9c99fed70..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 29073eed7..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - float output_sse14_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max((output_generic[i])*1e-4, 1e-4)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max((output_generic[i])*1e-4, 1e-4)); - } -} - - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h deleted file mode 100644 index 63b5fdadb..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 4980c982a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 02050926f..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 3c3f737a1..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex *input0; - - float* output_generic; - float* output_sse; - float* output_sse4_1; - - ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t)); - ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float)); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0); - } - - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } - - free(input0); - free(output_generic); - free(output_sse); - free(output_sse4_1); -} - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 93338e488..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index a33d1bf30..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 92fc0dd4a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include -#include - -class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc deleted file mode 100644 index 216bf1cef..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t)); - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_16sc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex*)result_generic, (std::complex*)input, (std::complex*)taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex*)result_sse4_1, (std::complex*)input, (std::complex*)taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h deleted file mode 100644 index 0e78a5eca..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H - -#include -#include - -class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc deleted file mode 100644 index 4c707446e..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float)); - - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex*)input, (const std::complex*)taps, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h deleted file mode 100644 index eb9ae309c..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H - -#include -#include - -class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc deleted file mode 100644 index 8e7e59768..000000000 --- a/volk/lib/qa_volk.cc +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -/* - * This class gathers together all the test cases for the example - * directory into a single test suite. As you create new test cases, - * add them here. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -CppUnit::TestSuite * -qa_volk::suite() -{ - CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - - s->addTest(qa_16s_quad_max_star_aligned16::suite()); - s->addTest(qa_32fc_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); - s->addTest(qa_32fc_square_dist_aligned16::suite()); - s->addTest(qa_32f_sum_of_poly_aligned16::suite()); - s->addTest(qa_32fc_index_max_aligned16::suite()); - s->addTest(qa_32f_index_max_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); - s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); - s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); - s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); - s->addTest(qa_16s_max_star_aligned16::suite()); - s->addTest(qa_16s_add_quad_aligned16::suite()); - s->addTest(qa_32f_add_aligned16::suite()); - s->addTest(qa_32f_subtract_aligned16::suite()); - s->addTest(qa_32f_max_aligned16::suite()); - s->addTest(qa_32f_min_aligned16::suite()); - s->addTest(qa_64f_max_aligned16::suite()); - s->addTest(qa_64f_min_aligned16::suite()); - s->addTest(qa_32s_and_aligned16::suite()); - s->addTest(qa_32s_or_aligned16::suite()); - s->addTest(qa_32f_dot_prod_aligned16::suite()); - s->addTest(qa_32f_dot_prod_unaligned16::suite()); - s->addTest(qa_32f_fm_detect_aligned16::suite()); - //s->addTest(qa_32fc_32f_multiply_aligned16::suite()); - s->addTest(qa_32fc_multiply_aligned16::suite()); - s->addTest(qa_32f_divide_aligned16::suite()); - s->addTest(qa_32f_multiply_aligned16::suite()); - s->addTest(qa_32f_sqrt_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite()); - s->addTest(qa_32u_popcnt_aligned16::suite()); - s->addTest(qa_64u_popcnt_aligned16::suite()); - s->addTest(qa_16u_byteswap_aligned16::suite()); - s->addTest(qa_32u_byteswap_aligned16::suite()); - s->addTest(qa_64u_byteswap_aligned16::suite()); - s->addTest(qa_32f_normalize_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_16s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_64f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite()); - s->addTest(qa_32fc_magnitude_16s_aligned16::suite()); - s->addTest(qa_32fc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32f_interleave_16sc_aligned16::suite()); - s->addTest(qa_32f_interleave_32fc_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16s_convert_32f_aligned16::suite()); - s->addTest(qa_16s_convert_32f_unaligned16::suite()); - s->addTest(qa_16s_convert_8s_aligned16::suite()); - s->addTest(qa_16s_convert_8s_unaligned16::suite()); - s->addTest(qa_32f_convert_16s_aligned16::suite()); - s->addTest(qa_32f_convert_16s_unaligned16::suite()); - s->addTest(qa_32f_convert_32s_aligned16::suite()); - s->addTest(qa_32f_convert_32s_unaligned16::suite()); - s->addTest(qa_32f_convert_64f_aligned16::suite()); - s->addTest(qa_32f_convert_64f_unaligned16::suite()); - s->addTest(qa_32f_convert_8s_aligned16::suite()); - s->addTest(qa_32f_convert_8s_unaligned16::suite()); - s->addTest(qa_32s_convert_32f_aligned16::suite()); - s->addTest(qa_32s_convert_32f_unaligned16::suite()); - s->addTest(qa_64f_convert_32f_aligned16::suite()); - s->addTest(qa_64f_convert_32f_unaligned16::suite()); - s->addTest(qa_8s_convert_16s_aligned16::suite()); - s->addTest(qa_8s_convert_16s_unaligned16::suite()); - s->addTest(qa_8s_convert_32f_aligned16::suite()); - s->addTest(qa_8s_convert_32f_unaligned16::suite()); - s->addTest(qa_32fc_32f_power_32fc_aligned16::suite()); - s->addTest(qa_32f_power_aligned16::suite()); - s->addTest(qa_32fc_atan2_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite()); - s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite()); - s->addTest(qa_32f_accumulator_aligned16::suite()); - s->addTest(qa_32f_stddev_aligned16::suite()); - s->addTest(qa_32f_stddev_and_mean_aligned16::suite()); - - return s; -} diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h deleted file mode 100644 index 43fa7faba..000000000 --- a/volk/lib/qa_volk.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU Example Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Example Public License for more details. - * - * You should have received a copy of the GNU Example Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#ifndef INCLUDED_QA_VOLK_H -#define INCLUDED_QA_VOLK_H - -#include - -//! collect all the tests for the example directory - -class qa_volk { - public: - //! return suite of tests for all of example directory - static CppUnit::TestSuite *suite (); -}; - -#endif /* INCLUDED_QA_VOLK_H */ diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc deleted file mode 100644 index 50ac08eab..000000000 --- a/volk/lib/test_all.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2002,2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -int -main (int argc, char **argv) -{ - - int opt = 0; - std::string xmlOutputFile(""); - - while( (opt = getopt(argc, argv, "o:")) != -1){ - switch(opt){ - case 'o': - if(optarg){ - xmlOutputFile.assign(optarg); - } - else{ - std::cerr << "No xml file output specified for -o" << std::endl; - exit(EXIT_FAILURE); - } - break; - - default: /* '?' */ - fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n", - argv[0]); - exit(EXIT_FAILURE); - } - - } - - CppUnit::TextUi::TestRunner runner; - - runner.addTest (qa_volk::suite ()); - - bool was_successful = false; - if(!xmlOutputFile.empty()){ - std::ofstream xmlOutput(xmlOutputFile.c_str()); - if(xmlOutput.is_open()){ - runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput)); - - was_successful = runner.run("", false, true, false); - } - xmlOutput.close(); - } - else{ - was_successful = runner.run ("", false); - } - - return was_successful ? 0 : 1; -} -- cgit From f832c9789be9fec46e211be4fb2355013d19c000 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Fri, 21 Jan 2011 18:24:02 -0800 Subject: Volk: Small changes to speed things up. --- volk/lib/qa_utils.cc | 2 +- volk/lib/testqa.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 67ce5ddef..9cafd459f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -32,7 +32,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { if(type.size == 8) random_floats((double *)data, n); else random_floats((float *)data, n); } else { - float int_max = pow(2, type.size*8); + float int_max = float(uint64_t(2) << (type.size*8)); if(type.is_signed) int_max /= 2.0; for(int i=0; i((RAND_MAX/2))) * int_max; diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 9f4934dc0..4cef7b443 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); -- cgit From b0a23e876fe0f92afb2c55fd4fbce6427e9598d8 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 25 Jan 2011 15:06:23 -0800 Subject: Volk: doesn't test a routine if no valid architectures other than generic are found --- volk/lib/qa_utils.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 9cafd459f..6a6f87d85 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -288,6 +288,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //first let's get a list of available architectures for the test std::vector arch_list = get_arch_list(archs); + if(arch_list.size() < 2) { + std::cout << "no architectures to test" << std::endl; + return false; + } + //now we have to get a function signature by parsing the name std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); -- cgit From e979880d446949b2d2a93087011579c383369819 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 13 Jan 2011 18:57:48 +0000 Subject: Volk: QA util has proper free(). --- volk/lib/qa_utils.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 6a6f87d85..e85e2c1bc 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -309,10 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //for(int i=0; i inbuffs; - + std::vector free_buffs; //this is just a list of void*'s that i'll have to free later. + //we need it because we dupe void*s in test_data below. make_buffer_for_signature(inbuffs, inputsig, vlen); for(int i=0; i arch_buffs; for(int j=0; j +#include #include BOOST_AUTO_TEST_CASE(volk_test_all) { -- cgit From e34a484084a5224ec3412bd7d6c6f285301f5d43 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 26 Jan 2011 15:47:56 -0800 Subject: Volk: renamed volk_32fc_32f_power_32fc_a16 to volk_32fc_s32f_power_32fc_a16 --- volk/lib/testqa.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index e9734411b..f33670856 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); -- cgit From 6503e3b21978b71908400c994148836bec4a97b9 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Sun, 30 Jan 2011 12:35:07 -0500 Subject: volk: Updating build structure to work when orc is not installed. Distcheck passes for me if liborc is installed or not. --- volk/lib/Makefile.am | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 6f3d7fd86..af7c7f335 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -45,7 +45,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ # list of programs run by "make check" and "make distcheck" -TESTS = testqa +#TESTS = testqa #orc stuff gets built in the ORC directory conditional to ORC being enabled. #it gets linked in during the build of libvolk as an added library. #there might be a better way to do this. @@ -77,7 +77,7 @@ libvolk_la_SOURCES = \ volk_orc_LDFLAGS = \ $(ORC_LDFLAGS) \ -lorc-0.4 - + volk_orc_LIBADD = \ ../orc/libvolk_orc.la @@ -103,7 +103,6 @@ endif #libvolk_qa_la_LIBADD = \ # libvolk.la \ # libvolk_runtime.la - # ---------------------------------------------------------------- # headers that don't get installed -- cgit From 736874202f15222fa3ec10ceeb1815e8a595ed3a Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 2 Feb 2011 13:55:15 -0500 Subject: volk: cleaning up makefile issues after merge. --- volk/lib/Makefile.am | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index af7c7f335..3e5502369 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -57,7 +57,8 @@ lib_LTLIBRARIES = \ EXTRA_DIST = \ volk_mktables.c \ volk_rank_archs.h \ - volk_proccpu_sim.c + volk_proccpu_sim.c \ + gcc_x86_cpuid.h # ---------------------------------------------------------------- # The main library @@ -109,8 +110,7 @@ endif # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_utils.h \ - assembly.h + qa_utils.h # ---------------------------------------------------------------- # Our test program -- cgit From b806f6e95cd917e54884841c8e7928204ecd78f8 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Wed, 2 Feb 2011 14:21:46 -0500 Subject: volk: updating to readd unaligned dot product under new name scheme. --- volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 --------------- volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h | 18 -- volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc | 138 +++++++++++++++ volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h | 18 ++ volk/lib/qa_volk.cc | 213 ----------------------- volk/lib/testqa.cc | 1 + 6 files changed, 157 insertions(+), 369 deletions(-) delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h delete mode 100644 volk/lib/qa_volk.cc (limited to 'volk/lib') diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc deleted file mode 100644 index a0680bab6..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -#include -#include - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_unaligned\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex* input; - std::complex* taps; - - std::complex* result_generic; - std::complex* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex(0,0); - result[0] = std::complex(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_unaligned\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_unaligned::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h deleted file mode 100644 index 7aead53a1..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H - -#include -#include - -class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */ diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc new file mode 100644 index 000000000..fefdf06ee --- /dev/null +++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-4) + +//test for sse + +#if LV_HAVE_SSE && LV_HAVE_64 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse"); + + printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#elif LV_HAVE_SSE && LV_HAVE_32 + +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform () * 32767; +} + + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + const int vlen = 789743; + + volk_environment_init(); + int ret; + + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result; + + ret = posix_memalign((void**)&input, 16, vlen << 3); + ret = posix_memalign((void**)&taps, 16, vlen << 3); + ret = posix_memalign((void**)&result_generic, 16, 8); + ret = posix_memalign((void**)&result, 16, 8); + + + result_generic[0] = std::complex(0,0); + result[0] = std::complex(0,0); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); + + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32"); + + printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); + printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); + + assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); + + free(input); + free(taps); + free(result_generic); + free(result); + +} + + +#else + +void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { + printf("sse not available... no test performed\n"); +} + +#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h new file mode 100644 index 000000000..f07402403 --- /dev/null +++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H +#define INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H + +#include +#include + +class qa_32fc_x2_conjugate_dot_prod_32fc_u : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_x2_conjugate_dot_prod_32fc_u); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H */ diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc deleted file mode 100644 index 98d3e9728..000000000 --- a/volk/lib/qa_volk.cc +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -/* - * This class gathers together all the test cases for the example - * directory into a single test suite. As you create new test cases, - * add them here. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -CppUnit::TestSuite * -qa_volk::suite() -{ - CppUnit::TestSuite *s = new CppUnit::TestSuite("volk"); - - s->addTest(qa_16s_quad_max_star_aligned16::suite()); - s->addTest(qa_32fc_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite()); - s->addTest(qa_32fc_square_dist_aligned16::suite()); - s->addTest(qa_32f_sum_of_poly_aligned16::suite()); - s->addTest(qa_32fc_index_max_aligned16::suite()); - s->addTest(qa_32f_index_max_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite()); - s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite()); - s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite()); - s->addTest(qa_16s_branch_4_state_8_aligned16::suite()); - s->addTest(qa_16s_max_star_horizontal_aligned16::suite()); - s->addTest(qa_16s_max_star_aligned16::suite()); - s->addTest(qa_16s_add_quad_aligned16::suite()); - s->addTest(qa_32f_add_aligned16::suite()); - s->addTest(qa_32f_subtract_aligned16::suite()); - s->addTest(qa_32f_max_aligned16::suite()); - s->addTest(qa_32f_min_aligned16::suite()); - s->addTest(qa_64f_max_aligned16::suite()); - s->addTest(qa_64f_min_aligned16::suite()); - s->addTest(qa_32s_and_aligned16::suite()); - s->addTest(qa_32s_or_aligned16::suite()); - s->addTest(qa_32f_dot_prod_aligned16::suite()); - s->addTest(qa_32f_dot_prod_unaligned16::suite()); - s->addTest(qa_32f_fm_detect_aligned16::suite()); - s->addTest(qa_32fc_32f_multiply_aligned16::suite()); - s->addTest(qa_32fc_multiply_aligned16::suite()); - s->addTest(qa_32f_divide_aligned16::suite()); - s->addTest(qa_32f_multiply_aligned16::suite()); - s->addTest(qa_32f_sqrt_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite()); - s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite()); - s->addTest(qa_32u_popcnt_aligned16::suite()); - s->addTest(qa_64u_popcnt_aligned16::suite()); - s->addTest(qa_16u_byteswap_aligned16::suite()); - s->addTest(qa_32u_byteswap_aligned16::suite()); - s->addTest(qa_64u_byteswap_aligned16::suite()); - s->addTest(qa_32f_normalize_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_16s_aligned16::suite()); - s->addTest(qa_16sc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_64f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite()); - s->addTest(qa_32fc_magnitude_16s_aligned16::suite()); - s->addTest(qa_32fc_magnitude_32f_aligned16::suite()); - s->addTest(qa_32f_interleave_16sc_aligned16::suite()); - s->addTest(qa_32f_interleave_32fc_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite()); - s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite()); - s->addTest(qa_16s_convert_32f_aligned16::suite()); - s->addTest(qa_16s_convert_32f_unaligned16::suite()); - s->addTest(qa_16s_convert_8s_aligned16::suite()); - s->addTest(qa_16s_convert_8s_unaligned16::suite()); - s->addTest(qa_32f_convert_16s_aligned16::suite()); - s->addTest(qa_32f_convert_16s_unaligned16::suite()); - s->addTest(qa_32f_convert_32s_aligned16::suite()); - s->addTest(qa_32f_convert_32s_unaligned16::suite()); - s->addTest(qa_32f_convert_64f_aligned16::suite()); - s->addTest(qa_32f_convert_64f_unaligned16::suite()); - s->addTest(qa_32f_convert_8s_aligned16::suite()); - s->addTest(qa_32f_convert_8s_unaligned16::suite()); - s->addTest(qa_32s_convert_32f_aligned16::suite()); - s->addTest(qa_32s_convert_32f_unaligned16::suite()); - s->addTest(qa_64f_convert_32f_aligned16::suite()); - s->addTest(qa_64f_convert_32f_unaligned16::suite()); - s->addTest(qa_8s_convert_16s_aligned16::suite()); - s->addTest(qa_8s_convert_16s_unaligned16::suite()); - s->addTest(qa_8s_convert_32f_aligned16::suite()); - s->addTest(qa_8s_convert_32f_unaligned16::suite()); - s->addTest(qa_32fc_32f_power_32fc_aligned16::suite()); - s->addTest(qa_32f_power_aligned16::suite()); - s->addTest(qa_32fc_atan2_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite()); - s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite()); - s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite()); - s->addTest(qa_32f_accumulator_aligned16::suite()); - s->addTest(qa_32f_stddev_aligned16::suite()); - s->addTest(qa_32f_stddev_and_mean_aligned16::suite()); - - return s; -} diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index f33670856..779bc61eb 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); -- cgit