From bef3db60e73953f2d2ecdc6a86a81e11df3b103d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Mon, 13 Dec 2010 19:18:45 -0800
Subject: volk: committed some stuff i neglected

---
 volk/lib/Makefile.am              | 17 +++++++++++------
 volk/lib/qa_32f_sqrt_aligned16.cc | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 814d438fd..1291b01cd 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 # list of programs run by "make check" and "make distcheck"
 #TESTS = test_all
-
+#orc stuff gets built in the ORC directory conditional to ORC being enabled.
+#it gets linked in during the build of libvolk as an added library.
+#there might be a better way to do this.
 
 lib_LTLIBRARIES = \
 	libvolk.la \
@@ -72,6 +74,9 @@ universal_CODE = 		\
 
 generic_CODE = 		\
 	volk_cpu_generic.c
+	
+orc_CODE =      \
+	volk_cpu_orc.c
 
 x86_CODE = 		\
 	volk_cpu_x86.c
@@ -133,10 +138,9 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-
-libvolk_la_LIBADD =
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_la_LIBADD = ../orc/libvolk_orc.a
 
 
@@ -233,11 +237,12 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
+	../orc/libvolk_orc.a \
 	$(CPPUNIT_LIBS)
 
 # ----------------------------------------------------------------
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
index 9a5f71de0..81d66dad7 100644
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -52,6 +52,14 @@ void qa_32f_sqrt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  
   /*
   for(int i = 0; i < 10; ++i) {
     printf("inputs: %f\n", input0[i]);
@@ -92,6 +100,13 @@ void qa_32f_sqrt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
   }
-- 
cgit 


From 611526f9dfba0df4a1a49d47916706438ac194b3 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 01:00:29 -0800
Subject: Volk: Automated more automake for orc. Brought orcc generation in.
 Shared library libvolk_orc.la. Linking is hackery right now with specified
 -lorc-0.4 flags; this should change. Otherwise pretty much OK.

---
 volk/lib/Makefile.am | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 1291b01cd..649d461e0 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -138,9 +138,9 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
-libvolk_la_LIBADD = ../orc/libvolk_orc.a
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_la_LIBADD = ../orc/libvolk_orc.la
 
 
@@ -237,12 +237,12 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
-	../orc/libvolk_orc.a \
+	../orc/libvolk_orc.la \
 	$(CPPUNIT_LIBS)
 
 # ----------------------------------------------------------------
-- 
cgit 


From 05f4bced29987a0a573d1fc5b214f3fa01dc84bd Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 13:36:55 -0800
Subject: Volk: More autotools stuff for Orc. Should build OK with or without
 Orc now.

---
 volk/lib/Makefile.am | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 649d461e0..385401ae1 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -138,10 +138,13 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+if HAVE_ORC
 libvolk_la_LIBADD = ../orc/libvolk_orc.la
-
+libvolk_la_LDFLAGS += -lorc-0.4
+libvolk_runtime_la_LDFLAGS += -lorc-0.4
+endif
 
 
 # ----------------------------------------------------------------
@@ -237,13 +240,18 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
-	../orc/libvolk_orc.la \
 	$(CPPUNIT_LIBS)
+	
+if HAVE_ORC
+libvolk_qa_la_LIBADD += \
+    ../orc/libvolk_orc.la
+    libvolk_qa_la_LDFLAGS += -lorc-0.4
+endif
 
 # ----------------------------------------------------------------
 # headers that don't get installed
-- 
cgit 


From d8031649fa3186d7e6b000dcfaa349deacf51262 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 16:41:14 -0800
Subject: Volk: patch via Nick M.

---
 volk/lib/Makefile.am | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 385401ae1..d38004f2a 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -74,9 +74,6 @@ universal_CODE = 		\
 
 generic_CODE = 		\
 	volk_cpu_generic.c
-	
-orc_CODE =      \
-	volk_cpu_orc.c
 
 x86_CODE = 		\
 	volk_cpu_x86.c
@@ -356,7 +353,7 @@ noinst_PROGRAMS = \
 	test_all
 
 test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la
+test_all_LDADD   = libvolk_qa.la ../orc/libvolk_orc.la
 
 
 distclean-local: 
-- 
cgit 


From 2e9a7d350713b4e1b21458db8f3fce8a557858ae Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 17:13:40 -0800
Subject: Volk: Added QA tests for all the Orc stuff. Added a 16u_byteswap but
 it's broken right now.

---
 volk/lib/qa_16u_byteswap_aligned16.cc   | 9 +++++++++
 volk/lib/qa_32f_add_aligned16.cc        | 9 +++++++++
 volk/lib/qa_32s_and_aligned16.cc        | 9 +++++++++
 volk/lib/qa_8s_convert_32f_aligned16.cc | 8 ++++++++
 4 files changed, 35 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index 6b19828a4..c30b6ba41 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16u_byteswap_aligned16::t1() {
   
   uint16_t output0[vlen] __attribute__ ((aligned (16)));
   uint16_t output01[vlen] __attribute__ ((aligned (16)));
+  uint16_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_16u_byteswap_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16u_byteswap_aligned16_manual(output02, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
   }
@@ -54,6 +62,7 @@ void qa_16u_byteswap_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);    
   }
 }
 
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
index 002aebfc9..d9214e8a2 100644
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -78,6 +78,7 @@ void qa_32f_add_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -93,6 +94,13 @@ void qa_32f_add_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -107,6 +115,7 @@ void qa_32f_add_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
index 72d05cf6f..5720ee869 100644
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ b/volk/lib/qa_32s_and_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32s_and_aligned16::t1() {
   
   int32_t output0[vlen] __attribute__ ((aligned (16)));
   int32_t output01[vlen] __attribute__ ((aligned (16)));
+  int32_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -40,6 +41,13 @@ void qa_32s_and_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32s_and_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
index 522da0b9d..3b3aa6919 100644
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_8s_convert_32f_aligned16.cc
@@ -40,6 +40,14 @@ void qa_8s_convert_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
-- 
cgit 


From 87a9b14e0b0e2c2d0dcd75d42f2a15211265f102 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 17:44:34 -0800
Subject: Volk: added references to libs instead of specifying them directly

---
 volk/lib/Makefile.am | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index d38004f2a..faab4a010 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -133,14 +133,21 @@ libvolk_runtime_la_SOURCES =	\
 	$(universal_runtime_CODE)
 endif
 
+volk_orc_LDFLAGS = \
+	$(ORC_LDFLAGS) \
+	-lorc-0.4
+	
+volk_orc_LIBADD = \
+	../orc/libvolk_orc.la
 
-
+if HAVE_ORC
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_la_LIBADD = $(volk_orc_LIBADD)
+else
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-if HAVE_ORC
-libvolk_la_LIBADD = ../orc/libvolk_orc.la
-libvolk_la_LDFLAGS += -lorc-0.4
-libvolk_runtime_la_LDFLAGS += -lorc-0.4
+libvolk_la_LIBADD =
 endif
 
 
@@ -243,12 +250,6 @@ libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
 	$(CPPUNIT_LIBS)
-	
-if HAVE_ORC
-libvolk_qa_la_LIBADD += \
-    ../orc/libvolk_orc.la
-    libvolk_qa_la_LDFLAGS += -lorc-0.4
-endif
 
 # ----------------------------------------------------------------
 # headers that don't get installed
@@ -353,7 +354,7 @@ noinst_PROGRAMS = \
 	test_all
 
 test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la ../orc/libvolk_orc.la
+test_all_LDADD   = libvolk_qa.la
 
 
 distclean-local: 
-- 
cgit 


From 21426265324c883c91eeaaf75a81f2ccdc6e249d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 21:12:49 -0800
Subject: Volk: Build fixes to work with/without Orc.

---
 volk/lib/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index faab4a010..253033461 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -140,7 +140,7 @@ volk_orc_LDFLAGS = \
 volk_orc_LIBADD = \
 	../orc/libvolk_orc.la
 
-if HAVE_ORC
+if LV_HAVE_ORC
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
 libvolk_la_LIBADD = $(volk_orc_LIBADD)
-- 
cgit 


From f9ee6a55cb397f9302769a25a8c959fa162354f0 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 22:58:33 -0800
Subject: Volk: Some new basic Orc implementations with QA code

---
 volk/lib/qa_16u_byteswap_aligned16.cc |  1 +
 volk/lib/qa_32f_divide_aligned16.cc   | 10 ++++++++++
 volk/lib/qa_32f_multiply_aligned16.cc |  9 +++++++++
 volk/lib/qa_32f_subtract_aligned16.cc |  9 +++++++++
 4 files changed, 29 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index c30b6ba41..b740f91df 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -30,6 +30,7 @@ void qa_16u_byteswap_aligned16::t1() {
     output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
   }
   memcpy(output01, output0, vlen*sizeof(uint16_t));
+  memcpy(output02, output0, vlen*sizeof(uint16_t));
 
   printf("16u_byteswap_aligned\n");
 
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index 8826bf94f..f104e0443 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -35,6 +35,7 @@ void qa_32f_divide_aligned16::t1() {
   float input1[vlen] __attribute__ ((aligned (16)));
   
   float output0[vlen] __attribute__ ((aligned (16)));
+  float output1[vlen] __attribute__ ((aligned (16)));
   float output_known[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
@@ -51,6 +52,14 @@ void qa_32f_divide_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   /*
   for(int i = 0; i < 10; ++i) {
@@ -61,6 +70,7 @@ void qa_32f_divide_aligned16::t1() {
   
   for(int i = 0; i < vlen; ++i) {
     CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+    CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
index e52748466..f9c034d70 100644
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -78,6 +78,7 @@ void qa_32f_multiply_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -93,6 +94,13 @@ void qa_32f_multiply_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -107,6 +115,7 @@ void qa_32f_multiply_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
index a7e1b5ae3..5a5a7c9b6 100644
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ b/volk/lib/qa_32f_subtract_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_subtract_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_subtract_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_subtract_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
-- 
cgit 


From 15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 15 Dec 2010 16:27:42 -0800
Subject: Volk: A bunch of new ORC routines plus tests. Also fixed a typo in
 the generic version of 16sc_magnitude_16s_a16.

---
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc |  9 +++++++++
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 20 ++++++++++++++++++++
 volk/lib/qa_32f_divide_aligned16.cc         |  9 +++++++++
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc |  9 +++++++++
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc |  9 +++++++++
 volk/lib/qa_32s_or_aligned16.cc             |  9 +++++++++
 6 files changed, 65 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index b14610757..c8f13ff84 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 2c9e48f6e..e7178863c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_known[vlen] __attribute__ ((aligned (16)));
 
   int16_t* inputLoad = (int16_t*)input0;
@@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   /*
   for(int i = 0; i < 100; ++i) {
@@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
   }
 }
 
@@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_sse[vlen] __attribute__ ((aligned (16)));
   float output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -79,6 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index f104e0443..b2c2ecf9a 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -103,6 +104,13 @@ void qa_32f_divide_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index a4be1616b..c3e65866b 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
   
   int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
index d69ada408..6a1d46c7a 100644
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
   std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_sse[vlen] __attribute__ ((aligned (16)));
   float output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
index e09dfb91c..9ea5283a6 100644
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() {
   
   int32_t output0[vlen] __attribute__ ((aligned (16)));
   int32_t output01[vlen] __attribute__ ((aligned (16)));
+  int32_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -40,6 +41,13 @@ void qa_32s_or_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
-- 
cgit 


From c6fff77de9b686761f93f0e1de237f8543f5e919 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 11:14:41 -0800
Subject: Volk: A bunch of new Orc routines plus a couple of build changes.
 32fc_magnitude_16s fails test_all right now.

---
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc     | 12 ++++++++++++
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc     | 11 +++++++++++
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc |  9 +++++++++
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc        |  5 +++--
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc        |  6 +++---
 volk/lib/qa_32f_max_aligned16.cc                   |  9 +++++++++
 volk/lib/qa_32f_min_aligned16.cc                   |  9 +++++++++
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc        |  8 ++++----
 volk/lib/qa_volk.cc                                |  1 -
 9 files changed, 60 insertions(+), 10 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index e700ac72c..7e9e31df5 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
   int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
   int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
   int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
 
@@ -43,6 +45,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
   }
@@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
 
     CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
     CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
+    
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_orc[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_orc1[i]);
   }
 }
 
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index 6ee076998..45100206d 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
   float output_generic1[vlen] __attribute__ ((aligned (16)));
   float output_sse2[vlen] __attribute__ ((aligned (16)));
   float output_sse21[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
+  float output_orc1[vlen] __attribute__ ((aligned (16)));
 
   int16_t* loadInput = (int16_t*)input0;
   for(int i = 0; i < vlen*2; ++i) {   
@@ -41,6 +43,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
   }
@@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_orc1[i], fabs(output_generic1[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index 5ab458bc9..d187d20c3 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   
   int8_t output_generic[vlen] __attribute__ ((aligned (16)));
   int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+  int8_t output_orc[vlen] __attribute__ ((aligned (16)));
 
   int16_t* loadInput = (int16_t*)input0;
   for(int i = 0; i < vlen*2; ++i) {   
@@ -39,6 +40,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
   }
@@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index c8f13ff84..dd4ae75ff 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-  start = clock();
+/*  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
+*/
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+    //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index e7178863c..53d42e28c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-  start = clock();
+/*  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
-
+*/
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+//    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index 3ef375176..cb1fd3627 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_max_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 617e18b24..bf453f360 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_min_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index c3e65866b..105d32d0c 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
+  //for(int i = 0; i < 10; ++i) {
+  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  //}
   
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..f6a334da7 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,7 +118,6 @@ CppUnit::TestSuite *
 qa_volk::suite()
 {
   CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
   s->addTest(qa_16s_quad_max_star_aligned16::suite());
   s->addTest(qa_32fc_dot_prod_aligned16::suite());
   s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-- 
cgit 


From 200720da362e30f74083aad4dc106e4a057638bf Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 12:20:16 -0800
Subject: Volk: Magnitude functions. 32fc_magnitude_16s currently clips to +MAX
 instead of -MAX.

---
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 6 +++---
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index dd4ae75ff..d00315b57 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-/*  start = clock();
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
-*/
+
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 105d32d0c..53b3bf790 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  //for(int i = 0; i < 10; ++i) {
-  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
-  //}
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+    printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  }
   
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
-- 
cgit 


From 0e92b93f21fc9c324c379bc318120d414e7422cc Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 13:35:40 -0800
Subject: Volk: Orc impl for 32fc_magnitude_16s saturates at -max instead of
 +max.

---
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 6 +++---
 volk/lib/qa_volk.cc                         | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 53b3bf790..93d4ec150 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,9 +63,9 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-    printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  for(int i = 0; i < 1; ++i) {
+  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
   }
   
   for(int i = 0; i < vlen; ++i) {
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index f6a334da7..c3c27b69b 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,6 +118,7 @@ CppUnit::TestSuite *
 qa_volk::suite()
 {
   CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
+
   s->addTest(qa_16s_quad_max_star_aligned16::suite());
   s->addTest(qa_32fc_dot_prod_aligned16::suite());
   s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-- 
cgit 


From 5b45b875ed58fd66234764a05da42c6eaff22c4d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 11 Jan 2011 15:17:55 -0800
Subject: Volk: Added more Orc routines (including complex multiply). Started
 redoing the testing framework so it's easier to add new archs to tests.

---
 volk/lib/Makefile.am                       |  2 +
 volk/lib/qa_32f_normalize_aligned16.cc     | 13 +++++
 volk/lib/qa_32fc_32f_multiply_aligned16.cc | 84 +++++++++++++-----------------
 volk/lib/qa_32fc_multiply_aligned16.cc     | 12 +++++
 4 files changed, 64 insertions(+), 47 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 253033461..0aeafe4aa 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -156,6 +156,7 @@ endif
 # ----------------------------------------------------------------
 libvolk_qa_la_SOURCES = \
 	qa_volk.cc \
+	qa_utils.cc \
 	qa_16s_quad_max_star_aligned16.cc \
 	qa_32fc_dot_prod_aligned16.cc \
 	qa_32fc_square_dist_aligned16.cc \
@@ -257,6 +258,7 @@ libvolk_qa_la_LIBADD = \
 noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
+	qa_utils.h \
 	assembly.h \
 	qa_16s_quad_max_star_aligned16.h \
 	qa_32fc_dot_prod_aligned16.h \
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
index 1c7b485a6..0da43ecff 100644
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ b/volk/lib/qa_32f_normalize_aligned16.cc
@@ -26,13 +26,16 @@ void qa_32f_normalize_aligned16::t1() {
 
   float* output0;
   float* output01;
+  float* output02;
   ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
   ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
+  ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float));
 
   for(int i = 0; i < vlen; ++i) {   
     output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
   }
   memcpy(output01, output0, vlen*sizeof(float));
+  memcpy(output02, output0, vlen*sizeof(float));
   printf("32f_normalize_aligned\n");
 
   start = clock();
@@ -49,6 +52,14 @@ void qa_32f_normalize_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  
   for(int i = 0; i < 1; ++i) {
     //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
     //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
@@ -57,10 +68,12 @@ void qa_32f_normalize_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     // printf("%e...%e\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4);
   }
 
   free(output0);
   free(output01);
+  free(output02);
 }
 
 #endif
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
index 4eba0a3cd..7bb8d21c1 100644
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
@@ -2,28 +2,12 @@
 #include <volk/volk.h>
 #include <qa_32fc_32f_multiply_aligned16.h>
 #include <stdlib.h>
-#include <math.h>
 #include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+#include <string.h>
+#include <qa_utils.h>
 
 #define	ERR_DELTA	(1e-4)
 
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
 void qa_32fc_32f_multiply_aligned16::t1() {
 
   const int vlen = 2046;
@@ -36,50 +20,56 @@ void qa_32fc_32f_multiply_aligned16::t1() {
   std::complex<float>* input;
   float * taps;
   int i;
+  std::vector<std::string> archs;
+  archs.push_back("generic");
+#ifdef LV_HAVE_SSE3
+  archs.push_back("sse3");
+#endif
+#ifdef LV_HAVE_ORC
+  archs.push_back("orc");
+#endif
   
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
+  std::vector<std::complex<float>* > results;
 
   ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
   ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float));
+  
+  for(i=0; i < archs.size(); i++) {
+      std::complex<float> *ptr;
+      ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float));
+      if(ret) {
+          printf("Couldn't allocate memory\n");
+          exit(1);
+      }
+      results.push_back(ptr);
+  }
 
   random_floats((float*)input, vlen * 2);
   random_floats(taps, vlen);
   
   printf("32fc_32f_multiply_aligned16\n");
 
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
+  for(i=0; i < archs.size(); i++) {
+    start = clock();
+    for(int count = 0; count < ITERS; ++count) {
+      volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str());
+    }
+    end = clock();
+    total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+    printf("%s_time: %f\n", archs[i].c_str(), total);
   }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
 
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+  for(i=0; i < vlen; i++) {
+      int j = 1;
+      for(j; j < archs.size(); j++) {
+          assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA);
+      }
   }
 
   free(input);
   free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-#else
-void qa_32fc_32f_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
+  for(i=0; i < archs.size(); i++) {      
+    free(results[i]);
+  }
 }
 
-#endif /* LV_HAVE_SSE3 */
-
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
index e1f7eab3d..022b58ad6 100644
--- a/volk/lib/qa_32fc_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_multiply_aligned16.cc
@@ -41,11 +41,13 @@ void qa_32fc_multiply_aligned16::t1() {
   
   std::complex<float>* result_generic;
   std::complex<float>* result_sse3;
+  std::complex<float>* result_orc;
 
   ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float));
   
   random_floats((float*)input, vlen * 2);
   random_floats((float*)taps, vlen * 2);
@@ -67,15 +69,25 @@ void qa_32fc_multiply_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   for(i = 0; i < vlen; i++){
     assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+    assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA);
   }
 
   free(input);
   free(taps);
   free(result_generic);
   free(result_sse3);
+  free(result_orc);
   
 }
 #else
-- 
cgit 


From c77bb3e71562daa68e9a195a0131b7cc04324784 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 12 Jan 2011 19:20:35 -0800
Subject: Volk: Working on a new QA architecture that doesn't require
 individual test programs.

---
 volk/lib/Makefile.am                          |   2 -
 volk/lib/qa_32fc_32f_multiply_aligned16.cc    |   6 +-
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc |   2 +-
 volk/lib/qa_utils.cc                          | 223 ++++++++++++++++++++++++++
 volk/lib/qa_utils.h                           |  19 +++
 volk/lib/qa_volk.cc                           |   2 +-
 6 files changed, 247 insertions(+), 7 deletions(-)
 create mode 100644 volk/lib/qa_utils.cc
 create mode 100644 volk/lib/qa_utils.h

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 0aeafe4aa..a10b0a362 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -156,7 +156,6 @@ endif
 # ----------------------------------------------------------------
 libvolk_qa_la_SOURCES = \
 	qa_volk.cc \
-	qa_utils.cc \
 	qa_16s_quad_max_star_aligned16.cc \
 	qa_32fc_dot_prod_aligned16.cc \
 	qa_32fc_square_dist_aligned16.cc \
@@ -181,7 +180,6 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_dot_prod_aligned16.cc \
 	qa_32f_dot_prod_unaligned16.cc \
 	qa_32f_fm_detect_aligned16.cc \
-	qa_32fc_32f_multiply_aligned16.cc \
 	qa_32fc_multiply_aligned16.cc \
 	qa_32f_divide_aligned16.cc \
 	qa_32f_multiply_aligned16.cc \
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
index 7bb8d21c1..b80e0e008 100644
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
@@ -5,10 +5,11 @@
 #include <time.h>
 #include <string.h>
 #include <qa_utils.h>
+#include <boost/test/unit_test.hpp>
 
-#define	ERR_DELTA	(1e-4)
+#define	TOLERANCE	(1e-4)
 
-void qa_32fc_32f_multiply_aligned16::t1() {
+void qa_32fc_32f_multiply_aligned16(void) {
 
   const int vlen = 2046;
   const int ITERS = 100000;
@@ -72,4 +73,3 @@ void qa_32fc_32f_multiply_aligned16::t1() {
     free(results[i]);
   }
 }
-
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
index 94e63e37d..f753e1107 100644
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
@@ -40,7 +40,7 @@ void qa_8sc_deinterleave_16s_aligned16::t1() {
 
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
new file mode 100644
index 000000000..4d93ca62a
--- /dev/null
+++ b/volk/lib/qa_utils.cc
@@ -0,0 +1,223 @@
+#include "qa_utils.h"
+#include <stdlib.h>
+#include <boost/foreach.hpp>
+#include <boost/assign/list_of.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/test/unit_test.hpp>
+#include <iostream>
+#include <vector>
+#include <time.h>
+//#include <math.h>
+//#include <volk/volk_runtime.h>
+#include <volk/volk_registry.h>
+#include <volk/volk.h>
+#include <boost/typeof/typeof.hpp>
+#include <boost/type_traits.hpp>
+//#include <boost/test/unit_test.hpp>
+
+float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+template <class t>
+t *make_aligned_buffer(unsigned int len) {
+  t *buf;
+  int ret;
+  ret = posix_memalign((void**)&buf, 16, len * sizeof(t));
+  assert(ret == 0);
+  return buf;
+}
+
+void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
+    BOOST_FOREACH(std::string sig, inputsig) {
+        if     (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer<lv_32fc_t>(vlen));
+        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<float>(vlen));
+        else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer<int16_t>(vlen));
+        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<int8_t>(vlen));
+        else std::cout << "Invalid type!" << std::endl;
+    }
+}
+
+static std::vector<std::string> get_arch_list(const int archs[]) {
+    std::vector<std::string> archlist;
+    int num_archs = archs[0];
+    
+    //there has got to be a way to query these arches
+    for(int i = 0; i < num_archs; i++) {
+        switch(archs[i+1]) {
+        case (1<<LV_GENERIC):
+            archlist.push_back("generic");
+            break;
+        case (1<<LV_ORC):
+            archlist.push_back("orc");
+            break;
+        case (1<<LV_SSE):
+            archlist.push_back("sse");
+            break;
+        case (1<<LV_SSE2):
+            archlist.push_back("sse2");
+            break;
+        case (1<<LV_SSSE3):
+            archlist.push_back("ssse3");
+            break;
+        case (1<<LV_SSE4_1):
+            archlist.push_back("sse4_1");
+            break;
+        case (1<<LV_SSE4_2):
+            archlist.push_back("sse4_2");
+            break;
+        case (1<<LV_SSE4_A):
+            archlist.push_back("sse4_a");
+            break;
+        case (1<<LV_MMX):
+            archlist.push_back("mmx");
+            break;
+        case (1<<LV_AVX):
+            archlist.push_back("avx");
+            break;
+        default:
+            break;
+        }
+    }
+    return archlist;
+}
+
+static bool is_valid_type(std::string type) {
+    std::vector<std::string> valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u");
+    
+    BOOST_FOREACH(std::string this_type, valid_types) {
+        if(type == this_type) return true;
+    }
+    return false;
+}
+    
+
+static void get_function_signature(std::vector<std::string> &inputsig, 
+                                   std::vector<std::string> &outputsig, 
+                                   std::string name) {
+    boost::char_separator<char> sep("_");
+    boost::tokenizer<boost::char_separator<char> > tok(name, sep);
+    std::vector<std::string> toked;
+    tok.assign(name);
+    toked.assign(tok.begin(), tok.end());
+    
+    assert(toked[0] == "volk");
+    
+    inputsig.push_back(toked[1]); //mandatory
+    int pos = 2;
+    bool valid_type = true;
+    while(valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]);
+        else valid_type = false;
+        pos++;
+    }
+    while(!valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) valid_type = true;
+        pos++;
+    }
+    while(valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
+        else valid_type = false;
+        pos++;
+    }
+        
+    //if there's no explicit output sig then assume the output is the same as the first input
+    if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    assert(inputsig.size() != 0);
+    assert(outputsig.size() != 0);
+}
+
+inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str());
+}
+
+inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str());
+}
+
+inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
+}
+
+bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
+    std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
+    
+    //first let's get a list of available architectures for the test
+    std::vector<std::string> arch_list = get_arch_list(archs);
+    
+    BOOST_FOREACH(std::string arch, arch_list) {
+        std::cout << "Found an arch: " << arch << std::endl;
+    }
+    
+    //now we have to get a function signature by parsing the name
+    std::vector<std::string> inputsig, outputsig;
+    get_function_signature(inputsig, outputsig, name);
+
+    for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
+    for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
+    
+    //now that we have that, we'll set up input and output buffers based on the function signature
+    std::vector<void *> inbuffs;
+    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    
+    //and set the input buffers to something random
+    //TODO
+    
+    //allocate output buffers -- one for each output for each arch
+    std::vector<void *> outbuffs;
+    BOOST_FOREACH(std::string arch, arch_list) {
+        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    }
+    
+    //now run the test
+    clock_t start, end;
+    for(int i = 0; i < arch_list.size(); i++) {
+        start = clock();
+        switch(outputsig.size()+inputsig.size()) {
+            case 2:
+                run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            case 3:
+                run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            case 4:
+                run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            default:
+                break;
+        }
+        end = clock();
+        std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+    }
+
+    //and now compare each output to the generic output
+    //first we have to know which output is the generic one, they aren't in order...
+    int generic_offset;
+    for(int i=0; i<arch_list.size(); i++) 
+        if(arch_list[i] == "generic") generic_offset=i;
+    
+    for(int i=0; i<arch_list.size(); i++) {
+        if(arch_list[i] != "generic") {
+            for(int j=0; i<vlen; j++) {
+                BOOST_CHECK_CLOSE(((float *)(outbuffs[generic_offset]))[j], ((float *)(outbuffs[i]))[j], tol);
+            }
+        }
+    }
+
+    BOOST_FOREACH(void *buf, inbuffs) {
+        free(buf);
+    }
+    BOOST_FOREACH(void *buf, outbuffs) {
+        free(buf);
+    }
+    return 0;
+}
+
+
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
new file mode 100644
index 000000000..80323c445
--- /dev/null
+++ b/volk/lib/qa_utils.h
@@ -0,0 +1,19 @@
+#ifndef VOLK_QA_UTILS_H
+#define VOLK_QA_UTILS_H
+
+#include <stdlib.h>
+#include <string>
+#include <volk/volk.h>
+
+float uniform(void);
+void random_floats(float *buf, unsigned n);
+
+bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
+
+#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter)
+
+typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
+
+#endif //VOLK_QA_UTILS_H
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..8e7e59768 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -143,7 +143,7 @@ qa_volk::suite()
   s->addTest(qa_32f_dot_prod_aligned16::suite());
   s->addTest(qa_32f_dot_prod_unaligned16::suite());
   s->addTest(qa_32f_fm_detect_aligned16::suite());
-  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
+  //s->addTest(qa_32fc_32f_multiply_aligned16::suite());
   s->addTest(qa_32fc_multiply_aligned16::suite());
   s->addTest(qa_32f_divide_aligned16::suite());
   s->addTest(qa_32f_multiply_aligned16::suite());
-- 
cgit 


From 9a527257014878cac993ffe854bf8fdacc412be6 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 14 Jan 2011 13:07:06 -0800
Subject: Volk: QA code fixes, more Orc routines. Broke the 32fc_multiply Orc
 impl because I'm lame and lost some work. Fixed volk_8s_convert_16s Orc impl.
 Still need to rename functions and modify the QA sig parser to match. Then
 rewrite makefiles.

---
 volk/lib/qa_utils.cc | 94 ++++++++++++++++++++++++++++++++++++++++++----------
 volk/lib/qa_utils.h  |  2 +-
 2 files changed, 77 insertions(+), 19 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 4d93ca62a..fa21db487 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -3,7 +3,7 @@
 #include <boost/foreach.hpp>
 #include <boost/assign/list_of.hpp>
 #include <boost/tokenizer.hpp>
-#include <boost/test/unit_test.hpp>
+//#include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
 #include <time.h>
@@ -13,19 +13,39 @@
 #include <volk/volk.h>
 #include <boost/typeof/typeof.hpp>
 #include <boost/type_traits.hpp>
-//#include <boost/test/unit_test.hpp>
 
 float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
 }
 
-void
-random_floats (float *buf, unsigned n)
+void random_floats (float *buf, unsigned n)
 {
   for (unsigned i = 0; i < n; i++)
     buf[i] = uniform ();
 }
 
+void load_random_data(void *data, std::string sig, unsigned int n) {
+    if(sig == "32fc") {
+        random_floats((float *)data, n*2);
+    } else if(sig == "32f") {
+        random_floats((float *)data, n);
+    } else if(sig == "32u") {
+        for(int i=0; i<n; i++) ((uint32_t *)data)[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+    } else if(sig == "32s") {
+        for(int i=0; i<n; i++) ((int32_t *)data)[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+    } else if(sig == "16u") {
+        for(int i=0; i<n; i++) ((uint16_t *)data)[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+    } else if(sig == "16s") {
+        for(int i=0; i<n; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+    } else if(sig == "16sc") {
+        for(int i=0; i<n*2; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+    } else if(sig == "8u") {
+        for(int i=0; i<n; i++) ((uint8_t *)data)[i] = ((uint8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 256.0));
+    } else if(sig == "8s") {
+        for(int i=0; i<n; i++) ((int8_t *)data)[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+    } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl;
+}
+
 template <class t>
 t *make_aligned_buffer(unsigned int len) {
   t *buf;
@@ -37,11 +57,11 @@ t *make_aligned_buffer(unsigned int len) {
 
 void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
     BOOST_FOREACH(std::string sig, inputsig) {
-        if     (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer<lv_32fc_t>(vlen));
-        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<float>(vlen));
-        else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer<int16_t>(vlen));
-        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<int8_t>(vlen));
-        else std::cout << "Invalid type!" << std::endl;
+        if     (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer<uint64_t>(vlen));
+        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<uint32_t>(vlen));
+        else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer<uint16_t>(vlen));
+        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<uint8_t>(vlen));
+        else std::cout << "Invalid type: " << sig << std::endl;
     }
 }
 
@@ -90,7 +110,7 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
 }
 
 static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u");
+    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u");
     
     BOOST_FOREACH(std::string this_type, valid_types) {
         if(type == this_type) return true;
@@ -120,16 +140,23 @@ static void get_function_signature(std::vector<std::string> &inputsig,
     }
     while(!valid_type && pos < toked.size()) {
         if(is_valid_type(toked[pos])) valid_type = true;
-        pos++;
+        else pos++;
     }
     while(valid_type && pos < toked.size()) {
         if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
         else valid_type = false;
         pos++;
     }
-        
-    //if there's no explicit output sig then assume the output is the same as the first input
-    if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    
+    //if there's no output sig and only one input sig, assume there are 2 inputs
+    //this handles conversion fn's (which have a specified output sig) and most of the rest
+    if(outputsig.size() == 0 && inputsig.size() == 1) {
+        outputsig.push_back(inputsig[0]);
+        inputsig.push_back(inputsig[0]);
+    }//if there's no explicit output sig then assume the output is the same as the first input
+    else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    
+    
     assert(inputsig.size() != 0);
     assert(outputsig.size() != 0);
 }
@@ -168,7 +195,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     
     //and set the input buffers to something random
-    //TODO
+    for(int i=0; i<inputsig.size(); i++) {
+        load_random_data(inbuffs[i], inputsig[i], vlen);        
+    }
     
     //allocate output buffers -- one for each output for each arch
     std::vector<void *> outbuffs;
@@ -204,9 +233,38 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         if(arch_list[i] == "generic") generic_offset=i;
     
     for(int i=0; i<arch_list.size(); i++) {
-        if(arch_list[i] != "generic") {
-            for(int j=0; i<vlen; j++) {
-                BOOST_CHECK_CLOSE(((float *)(outbuffs[generic_offset]))[j], ((float *)(outbuffs[i]))[j], tol);
+        if(i != generic_offset) {
+            if(outputsig[0] == "32fc") {
+                for(int j=0; j<vlen*2; j++) {
+                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
+                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "32f") {
+                for(int j=0; j<vlen; j++) {
+                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
+                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint32_t *)(outbuffs[generic_offset]))[j] != ((uint32_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint32_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint32_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "16u" || outputsig[0] == "16s" || outputsig[0] == "8sc") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint16_t *)(outbuffs[generic_offset]))[j] != ((uint16_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint16_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint16_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else { 
+                std::cout << "Error: invalid type " << outputsig[0] << std::endl;
+                return 1;
             }
         }
     }
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 80323c445..f81d652fb 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -10,7 +10,7 @@ void random_floats(float *buf, unsigned n);
 
 bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
-#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter)
+#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
-- 
cgit 


From 5c4aab18e4e5e34ce1f8e286bc534a02c1318932 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 14 Jan 2011 13:21:08 -0800
Subject: Volk: Makefile changes to use new test framework. Doesn't currently
 build libvolk_qa, as I haven't really determined an appropriate place for
 "leftover" tests that the standard framework won't handle.

---
 volk/lib/Makefile.am | 218 ++++++---------------------------------------------
 1 file changed, 24 insertions(+), 194 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index a10b0a362..5c995148a 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -24,18 +24,19 @@ include $(top_srcdir)/Makefile.common
 # of a hack. Figure out the right way to do this to find built
 # volk_config.h and volk_tables.h
 
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
 	-I$(top_builddir)/include \
 	$(LV_CXXFLAGS) $(WITH_INCLUDES)
 
 
-# We build 2 libraries and 1 executable here.  One library contains
-# everything except the libcppunit QA code, and one contains only the
-# libcppunit-based QA code.  The C++ QA code is especially recommended
+# We build 1 library and 1 executable here.  The library contains
+# everything except the QA code. The C++ QA code is especially recommended
 # when you have general purpose C or C++ code that may not get
 # thoroughly exercised by building and running a GR block.  The
 # executable runs the QA code at "make check" time.
 #
+#
+#
 # N.B., If there's a SWIG generated shared library and associated
 # python code, it will be contained in ../python, not here.  (That
 # code is conditionally built depending on the state of the
@@ -44,15 +45,14 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 
 # list of programs run by "make check" and "make distcheck"
-#TESTS = test_all
+TESTS = testqa
 #orc stuff gets built in the ORC directory conditional to ORC being enabled.
 #it gets linked in during the build of libvolk as an added library.
 #there might be a better way to do this.
 
 lib_LTLIBRARIES = \
 	libvolk.la \
-	libvolk_runtime.la \
-	libvolk_qa.la
+	libvolk_runtime.la
 
 EXTRA_DIST = \
 	volk_mktables.c		\
@@ -154,101 +154,15 @@ endif
 # ----------------------------------------------------------------
 #        The QA library.  Note libvolk.la in LIBADD
 # ----------------------------------------------------------------
-libvolk_qa_la_SOURCES = \
-	qa_volk.cc \
-	qa_16s_quad_max_star_aligned16.cc \
-	qa_32fc_dot_prod_aligned16.cc \
-	qa_32fc_square_dist_aligned16.cc \
-	qa_32fc_square_dist_scalar_mult_aligned16.cc \
-	qa_32f_sum_of_poly_aligned16.cc \
-	qa_32fc_index_max_aligned16.cc \
-	qa_32f_index_max_aligned16.cc \
-	qa_32fc_conjugate_dot_prod_aligned16.cc \
-	qa_16s_permute_and_scalar_add_aligned16.cc \
-	qa_16s_branch_4_state_8_aligned16.cc \
-	qa_16s_max_star_horizontal_aligned16.cc \
-	qa_16s_max_star_aligned16.cc \
-	qa_16s_add_quad_aligned16.cc \
-	qa_32f_add_aligned16.cc \
-	qa_32f_subtract_aligned16.cc \
-	qa_32f_max_aligned16.cc \
-	qa_32f_min_aligned16.cc \
-	qa_64f_max_aligned16.cc \
-	qa_64f_min_aligned16.cc \
-	qa_32s_and_aligned16.cc \
-	qa_32s_or_aligned16.cc \
-	qa_32f_dot_prod_aligned16.cc \
-	qa_32f_dot_prod_unaligned16.cc \
-	qa_32f_fm_detect_aligned16.cc \
-	qa_32fc_multiply_aligned16.cc \
-	qa_32f_divide_aligned16.cc \
-	qa_32f_multiply_aligned16.cc \
-	qa_32f_sqrt_aligned16.cc \
-	qa_8sc_multiply_conjugate_16sc_aligned16.cc \
-	qa_8sc_multiply_conjugate_32fc_aligned16.cc \
-	qa_32u_popcnt_aligned16.cc \
-	qa_64u_popcnt_aligned16.cc \
-	qa_64u_byteswap_aligned16.cc \
-	qa_8sc_deinterleave_32f_aligned16.cc \
-	qa_16sc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_16s_aligned16.cc \
-	qa_32f_interleave_32fc_aligned16.cc \
-	qa_16u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_16s_aligned16.cc \
-	qa_32fc_deinterleave_real_32f_aligned16.cc \
-	qa_32fc_magnitude_32f_aligned16.cc \
-	qa_32fc_deinterleave_real_64f_aligned16.cc \
-	qa_32fc_deinterleave_real_16s_aligned16.cc \
-	qa_32fc_magnitude_16s_aligned16.cc \
-	qa_32fc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_real_8s_aligned16.cc \
-	qa_32fc_deinterleave_64f_aligned16.cc \
-	qa_32f_interleave_16sc_aligned16.cc \
-	qa_16sc_deinterleave_real_8s_aligned16.cc \
-	qa_16sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_32f_aligned16.cc \
-	qa_32u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_real_16s_aligned16.cc \
-	qa_8sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_16s_aligned16.cc \
-	qa_32f_normalize_aligned16.cc \
-	qa_8sc_deinterleave_real_16s_aligned16.cc \
-	qa_16s_convert_32f_aligned16.cc \
-	qa_16s_convert_32f_unaligned16.cc \
-	qa_16s_convert_8s_aligned16.cc \
-	qa_16s_convert_8s_unaligned16.cc \
-	qa_32f_convert_16s_aligned16.cc \
-	qa_32f_convert_16s_unaligned16.cc \
-	qa_32f_convert_32s_aligned16.cc \
-	qa_32f_convert_32s_unaligned16.cc \
-	qa_32f_convert_64f_aligned16.cc \
-	qa_32f_convert_64f_unaligned16.cc \
-	qa_32f_convert_8s_aligned16.cc \
-	qa_32f_convert_8s_unaligned16.cc \
-	qa_32s_convert_32f_aligned16.cc \
-	qa_32s_convert_32f_unaligned16.cc \
-	qa_64f_convert_32f_aligned16.cc \
-	qa_64f_convert_32f_unaligned16.cc \
-	qa_8s_convert_16s_aligned16.cc \
-	qa_8s_convert_16s_unaligned16.cc \
-	qa_8s_convert_32f_aligned16.cc \
-	qa_8s_convert_32f_unaligned16.cc \
-	qa_32fc_32f_power_32fc_aligned16.cc \
-	qa_32f_power_aligned16.cc \
-	qa_32fc_atan2_32f_aligned16.cc \
-	qa_32fc_power_spectral_density_32f_aligned16.cc \
-	qa_32fc_power_spectrum_32f_aligned16.cc \
-	qa_32f_calc_spectral_noise_floor_aligned16.cc \
-	qa_32f_accumulator_aligned16.cc \
-	qa_32f_stddev_aligned16.cc \
-	qa_32f_stddev_and_mean_aligned16.cc
-
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-
-libvolk_qa_la_LIBADD = \
-	libvolk.la \
-	libvolk_runtime.la \
-	$(CPPUNIT_LIBS)
+#libvolk_qa_la_SOURCES = \
+#	qa_utils.cc
+
+#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost
+
+#libvolk_qa_la_LIBADD = \
+#	libvolk.la \
+#	libvolk_runtime.la
+	
 
 # ----------------------------------------------------------------
 # headers that don't get installed
@@ -257,104 +171,20 @@ noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
 	qa_utils.h \
-	assembly.h \
-	qa_16s_quad_max_star_aligned16.h \
-	qa_32fc_dot_prod_aligned16.h \
-	qa_32fc_square_dist_aligned16.h \
-	qa_32fc_square_dist_scalar_mult_aligned16.h \
-	qa_32f_sum_of_poly_aligned16.h \
-	qa_32fc_index_max_aligned16.h \
-	qa_32f_index_max_aligned16.h \
-	qa_32fc_conjugate_dot_prod_aligned16.h \
-	qa_16s_permute_and_scalar_add_aligned16.h \
-	qa_16s_branch_4_state_8_aligned16.h \
-	qa_16s_max_star_horizontal_aligned16.h \
-	qa_16s_max_star_aligned16.h \
-	qa_16s_add_quad_aligned16.h \
-	qa_32f_add_aligned16.h \
-	qa_32f_subtract_aligned16.h \
-	qa_32f_max_aligned16.h \
-	qa_32f_min_aligned16.h \
-	qa_64f_max_aligned16.h \
-	qa_64f_min_aligned16.h \
-	qa_32s_and_aligned16.h \
-	qa_32s_or_aligned16.h \
-	qa_32f_dot_prod_aligned16.h \
-	qa_32f_dot_prod_unaligned16.h \
-	qa_32f_fm_detect_aligned16.h \
-	qa_32fc_32f_multiply_aligned16.h \
-	qa_32fc_multiply_aligned16.h \
-	qa_32f_divide_aligned16.h \
-	qa_32f_multiply_aligned16.h \
-	qa_32f_sqrt_aligned16.h \
-	qa_8sc_multiply_conjugate_16sc_aligned16.h \
-	qa_8sc_multiply_conjugate_32fc_aligned16.h \
-	qa_32u_popcnt_aligned16.h \
-	qa_64u_popcnt_aligned16.h \
-	qa_64u_byteswap_aligned16.h \
-	qa_8sc_deinterleave_32f_aligned16.h \
-	qa_16sc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_16s_aligned16.h \
-	qa_32f_interleave_32fc_aligned16.h \
-	qa_16u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_16s_aligned16.h \
-	qa_32fc_deinterleave_real_32f_aligned16.h \
-	qa_32fc_magnitude_32f_aligned16.h \
-	qa_32fc_deinterleave_real_64f_aligned16.h \
-	qa_32fc_deinterleave_real_16s_aligned16.h \
-	qa_32fc_magnitude_16s_aligned16.h \
-	qa_32fc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_real_8s_aligned16.h \
-	qa_32fc_deinterleave_64f_aligned16.h \
-	qa_32f_interleave_16sc_aligned16.h \
-	qa_16sc_deinterleave_real_8s_aligned16.h \
-	qa_16sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_32f_aligned16.h \
-	qa_32u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_real_16s_aligned16.h \
-	qa_8sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_16s_aligned16.h \
-	qa_32f_normalize_aligned16.h \
-	qa_8sc_deinterleave_real_16s_aligned16.h \
-	qa_16s_convert_32f_aligned16.h \
-	qa_16s_convert_32f_unaligned16.h \
-	qa_16s_convert_8s_aligned16.h \
-	qa_16s_convert_8s_unaligned16.h \
-	qa_32f_convert_16s_aligned16.h \
-	qa_32f_convert_16s_unaligned16.h \
-	qa_32f_convert_32s_aligned16.h \
-	qa_32f_convert_32s_unaligned16.h \
-	qa_32f_convert_64f_aligned16.h \
-	qa_32f_convert_64f_unaligned16.h \
-	qa_32f_convert_8s_aligned16.h \
-	qa_32f_convert_8s_unaligned16.h \
-	qa_32s_convert_32f_aligned16.h \
-	qa_32s_convert_32f_unaligned16.h \
-	qa_64f_convert_32f_aligned16.h \
-	qa_64f_convert_32f_unaligned16.h \
-	qa_8s_convert_16s_aligned16.h \
-	qa_8s_convert_16s_unaligned16.h \
-	qa_8s_convert_32f_aligned16.h \
-	qa_8s_convert_32f_unaligned16.h \
-	qa_32fc_32f_power_32fc_aligned16.h \
-	qa_32f_power_aligned16.h \
-	qa_32fc_atan2_32f_aligned16.h \
-	qa_32fc_power_spectral_density_32f_aligned16.h \
-	qa_32fc_power_spectrum_32f_aligned16.h \
-	qa_32f_calc_spectral_noise_floor_aligned16.h \
-	qa_32f_accumulator_aligned16.h \
-	qa_32f_stddev_aligned16.h \
-	qa_32f_stddev_and_mean_aligned16.h
-
+	assembly.h
 
 # ----------------------------------------------------------------
 # Our test program
 # ----------------------------------------------------------------
 noinst_PROGRAMS = \
-	test_all
+	testqa
 
-test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la
+testqa_SOURCES = testqa.cc qa_utils.cc
+testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
+testqa_LDFLAGS = -lboost_unit_test_framework
+testqa_LDADD  = \
+	libvolk.la \
+	libvolk_runtime.la
 
 
 distclean-local: 
-- 
cgit 


From d486ff4b4c039c8b3b06b6519839d522cf69be69 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Sun, 16 Jan 2011 14:03:16 -0800
Subject: volk_rename: renamed basically everything in the volk lib to have
 logically consistent function names

---
 volk/lib/Makefile.am |  3 ++-
 volk/lib/qa_utils.cc | 53 ++++++++++++++++++++++++++++++++++++----------------
 volk/lib/qa_utils.h  |  2 +-
 3 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 5c995148a..f609f5bf9 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -184,7 +184,8 @@ testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
 testqa_LDFLAGS = -lboost_unit_test_framework
 testqa_LDADD  = \
 	libvolk.la \
-	libvolk_runtime.la
+	libvolk_runtime.la \
+	../orc/libvolk_orc.la
 
 
 distclean-local: 
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index fa21db487..a8c00c143 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -110,7 +110,11 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
 }
 
 static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u");
+    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")
+                                                                 ("32s")("32u")("16sc")("16s")
+                                                                 ("16u")("8s")("8sc")("8u")
+                                                                 ("s32f")("s16u")("s16s")("s8u")
+                                                                 ("s8s");
     
     BOOST_FOREACH(std::string this_type, valid_types) {
         if(type == this_type) return true;
@@ -148,17 +152,11 @@ static void get_function_signature(std::vector<std::string> &inputsig,
         pos++;
     }
     
-    //if there's no output sig and only one input sig, assume there are 2 inputs
-    //this handles conversion fn's (which have a specified output sig) and most of the rest
-    if(outputsig.size() == 0 && inputsig.size() == 1) {
-        outputsig.push_back(inputsig[0]);
-        inputsig.push_back(inputsig[0]);
-    }//if there's no explicit output sig then assume the output is the same as the first input
-    else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
-    
-    
     assert(inputsig.size() != 0);
-    assert(outputsig.size() != 0);
+}
+
+inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buff, vlen, arch.c_str());
 }
 
 inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
@@ -190,26 +188,42 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
     for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
     
-    //now that we have that, we'll set up input and output buffers based on the function signature
+    //now that we have that, we'll set up input buffers based on the function signature
     std::vector<void *> inbuffs;
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     
+    //allocate output buffers -- one for each output for each arch
+    std::vector<void *> outbuffs;
+    BOOST_FOREACH(std::string arch, arch_list) {
+        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    }
+
     //and set the input buffers to something random
     for(int i=0; i<inputsig.size(); i++) {
         load_random_data(inbuffs[i], inputsig[i], vlen);        
     }
     
-    //allocate output buffers -- one for each output for each arch
-    std::vector<void *> outbuffs;
-    BOOST_FOREACH(std::string arch, arch_list) {
-        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    //so let's see here. if the operation has no output sig, it operates in place,
+    //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers.
+    if(outputsig.size() == 0) {
+        //make a set of output buffers according to the input signature
+        BOOST_FOREACH(std::string arch, arch_list) {
+            make_buffer_for_signature(outbuffs, inputsig, vlen);
+        }
+        //copy input buffer[0] to all the output buffers so it has something to operate on
+        //output buffer element size is the same as input buffer[0]
+        if(
     }
+        
     
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
         switch(outputsig.size()+inputsig.size()) {
+            case 1:
+                run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                break;
             case 2:
                 run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 break;
@@ -262,6 +276,13 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                         return 1;
                     }
                 }
+            } else if(outputsig[0] == "8s" || outputsig[0] == "8u") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint8_t *)(outbuffs[generic_offset]))[j] != ((uint8_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint8_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint8_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
             } else { 
                 std::cout << "Error: invalid type " << outputsig[0] << std::endl;
                 return 1;
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index f81d652fb..00883bf8e 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -3,7 +3,6 @@
 
 #include <stdlib.h>
 #include <string>
-#include <volk/volk.h>
 
 float uniform(void);
 void random_floats(float *buf, unsigned n);
@@ -12,6 +11,7 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
 #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
+typedef void (*volk_fn_1arg)(void *, unsigned int, const char*);
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
-- 
cgit 


From be1b7d9ffb90aa9c750e6c6793f00dbc8bec486d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 19 Jan 2011 16:39:28 -0800
Subject: Volk: test suite supports scalar arguments and in-place operations

---
 volk/lib/qa_utils.cc | 357 +++++++++++++++++++++++++++++++--------------------
 volk/lib/qa_utils.h  |  15 ++-
 2 files changed, 231 insertions(+), 141 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index a8c00c143..e73b70985 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -7,7 +7,8 @@
 #include <iostream>
 #include <vector>
 #include <time.h>
-//#include <math.h>
+#include <math.h>
+#include <boost/lexical_cast.hpp>
 //#include <volk/volk_runtime.h>
 #include <volk/volk_registry.h>
 #include <volk/volk.h>
@@ -24,44 +25,53 @@ void random_floats (float *buf, unsigned n)
     buf[i] = uniform ();
 }
 
-void load_random_data(void *data, std::string sig, unsigned int n) {
-    if(sig == "32fc") {
-        random_floats((float *)data, n*2);
-    } else if(sig == "32f") {
+void load_random_data(void *data, volk_type_t type, unsigned int n) {
+    if(type.is_complex) n *= 2;
+    if(type.is_float) {
+        assert(type.size == 4); //TODO: double support
         random_floats((float *)data, n);
-    } else if(sig == "32u") {
-        for(int i=0; i<n; i++) ((uint32_t *)data)[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-    } else if(sig == "32s") {
-        for(int i=0; i<n; i++) ((int32_t *)data)[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    } else if(sig == "16u") {
-        for(int i=0; i<n; i++) ((uint16_t *)data)[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-    } else if(sig == "16s") {
-        for(int i=0; i<n; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-    } else if(sig == "16sc") {
-        for(int i=0; i<n*2; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-    } else if(sig == "8u") {
-        for(int i=0; i<n; i++) ((uint8_t *)data)[i] = ((uint8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 256.0));
-    } else if(sig == "8s") {
-        for(int i=0; i<n; i++) ((int8_t *)data)[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-    } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl;
+    } else {
+        float int_max = pow(2, type.size*8);
+        if(type.is_signed) int_max /= 2.0;
+        for(int i=0; i<n; i++) {
+            float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
+            //man i really don't know how to do this in a more clever way, you have to cast down at some point
+            switch(type.size) {
+            case 8:
+                if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
+                else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
+            break;
+            case 4:
+                if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
+                else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
+            break;           
+            case 2:
+                if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
+                else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
+            break;
+            case 1:
+                if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
+                else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
+            break;
+            default:
+                throw; //no shenanigans here
+            }
+        }
+    }
 }
 
-template <class t>
-t *make_aligned_buffer(unsigned int len) {
-  t *buf;
+void *make_aligned_buffer(unsigned int len, unsigned int size) {
+  void *buf;
   int ret;
-  ret = posix_memalign((void**)&buf, 16, len * sizeof(t));
+  ret = posix_memalign((void**)&buf, 16, len * size);
   assert(ret == 0);
   return buf;
 }
 
-void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
-    BOOST_FOREACH(std::string sig, inputsig) {
-        if     (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer<uint64_t>(vlen));
-        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<uint32_t>(vlen));
-        else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer<uint16_t>(vlen));
-        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<uint8_t>(vlen));
-        else std::cout << "Invalid type: " << sig << std::endl;
+void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) {
+    BOOST_FOREACH(volk_type_t sig, inputsig) {
+        if(!sig.is_scalar) //we don't make buffers for scalars
+          buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1)));
     }
 }
 
@@ -109,22 +119,56 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
     return archlist;
 }
 
-static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")
-                                                                 ("32s")("32u")("16sc")("16s")
-                                                                 ("16u")("8s")("8sc")("8u")
-                                                                 ("s32f")("s16u")("s16s")("s8u")
-                                                                 ("s8s");
+volk_type_t volk_type_from_string(std::string name) {
+    volk_type_t type;
+    type.is_float = false;
+    type.is_scalar = false;
+    type.is_complex = false;
+    type.is_signed = false;
+    type.size = 0;
+    type.str = name;
+    
+    assert(name.size() > 1);
     
-    BOOST_FOREACH(std::string this_type, valid_types) {
-        if(type == this_type) return true;
+    //is it a scalar?
+    if(name[0] == 's') { 
+        type.is_scalar = true;
+        name = name.substr(1, name.size()-1);
+    }
+    
+    //get the data size
+    int last_size_pos = name.find_last_of("0123456789");
+    if(last_size_pos < 0) throw 0;
+    //will throw if malformed
+    int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
+
+    assert(((size % 8) == 0) && (size <= 64) && (size != 0));
+    type.size = size/8; //in bytes
+    
+    for(int i=last_size_pos+1; i < name.size(); i++) {
+        switch (name[i]) {
+        case 'f':
+            type.is_float = true;
+            break;
+        case 'i':
+            type.is_signed = true;
+            break;
+        case 'c':
+            type.is_complex = true;
+            break;
+        case 'u':
+            type.is_signed = false;
+            break;
+        default:
+            throw;
+        }
     }
-    return false;
-}
     
+    return type;
+}
 
-static void get_function_signature(std::vector<std::string> &inputsig, 
-                                   std::vector<std::string> &outputsig, 
+static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, 
+                                   std::vector<volk_type_t> &outputsig, 
                                    std::string name) {
     boost::char_separator<char> sep("_");
     boost::tokenizer<boost::char_separator<char> > tok(name, sep);
@@ -133,25 +177,38 @@ static void get_function_signature(std::vector<std::string> &inputsig,
     toked.assign(tok.begin(), tok.end());
     
     assert(toked[0] == "volk");
-    
-    inputsig.push_back(toked[1]); //mandatory
-    int pos = 2;
-    bool valid_type = true;
-    while(valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]);
-        else valid_type = false;
-        pos++;
-    }
-    while(!valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) valid_type = true;
-        else pos++;
-    }
-    while(valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
-        else valid_type = false;
-        pos++;
+    toked.erase(toked.begin());
+
+    //ok. we're assuming a string in the form
+    //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
+
+    enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT;
+    std::string fn_name;
+    volk_type_t type;
+    BOOST_FOREACH(std::string token, toked) {
+        try {
+            type = volk_type_from_string(token);
+            if(side == SIDE_INPUT) inputsig.push_back(type);
+            else outputsig.push_back(type);
+        } catch (...){
+            if(token[0] == 'x') { //it's a multiplier
+                if(side == SIDE_INPUT) assert(inputsig.size() > 0);
+                else assert(outputsig.size() > 0);
+                int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
+                for(int i=1; i<multiplier; i++) {
+                    if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
+                    else outputsig.push_back(outputsig.back());
+                }
+            }
+            else if(side == SIDE_INPUT) { //it's the function name, at least it better be
+                side = SIDE_OUTPUT;
+                fn_name = token;
+            } else {
+                if(token != toked.back()) throw; //the last token in the name is the alignment
+            }
+        }
     }
-    
+    //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
     assert(inputsig.size() != 0);
 }
 
@@ -171,61 +228,98 @@ inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *>
     while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
 }
 
+inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buff, scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
+}
+
+template <class t>
+bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    for(int i=0; i<vlen; i++) {
+        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) return 1;
+    }
+    return 0;
+}
+
+template <class t>
+bool icompare(t *in1, t *in2, unsigned int vlen) {
+    for(int i=0; i<vlen; i++) {
+        if(((t *)(in1))[i] != ((t *)(in2))[i]) return 1;
+    }
+    return 0;
+}
+
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
     
     //first let's get a list of available architectures for the test
     std::vector<std::string> arch_list = get_arch_list(archs);
     
-    BOOST_FOREACH(std::string arch, arch_list) {
-        std::cout << "Found an arch: " << arch << std::endl;
-    }
-    
     //now we have to get a function signature by parsing the name
-    std::vector<std::string> inputsig, outputsig;
-    get_function_signature(inputsig, outputsig, name);
-
-    for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
-    for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
-    
-    //now that we have that, we'll set up input buffers based on the function signature
-    std::vector<void *> inbuffs;
-    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    std::vector<volk_type_t> inputsig, outputsig;
+    get_signatures_from_name(inputsig, outputsig, name);
     
-    //allocate output buffers -- one for each output for each arch
-    std::vector<void *> outbuffs;
-    BOOST_FOREACH(std::string arch, arch_list) {
-        make_buffer_for_signature(outbuffs, outputsig, vlen);
-    }
-
-    //and set the input buffers to something random
+    std::vector<volk_type_t> inputsc, outputsc;
     for(int i=0; i<inputsig.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);        
+        if(inputsig[i].is_scalar) {
+            inputsc.push_back(inputsig[i]);
+            inputsig.erase(inputsig.begin() + i);
+        }
     }
+    for(int i=0; i<outputsig.size(); i++) {
+        if(outputsig[i].is_scalar) {
+            outputsc.push_back(outputsig[i]);
+            outputsig.erase(outputsig.begin() + i);
+        }
+    }
+    assert(outputsc.size() == 0); //we don't do output scalars yet
+
+    //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
+    //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
+    std::vector<void *> inbuffs, outbuffs;
     
-    //so let's see here. if the operation has no output sig, it operates in place,
-    //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers.
-    if(outputsig.size() == 0) {
-        //make a set of output buffers according to the input signature
-        BOOST_FOREACH(std::string arch, arch_list) {
+    if(outputsig.size() == 0) { //we're operating in place...
+        //assert(inputsig.size() == 1); //we only support 0 output 1 input right now...
+        make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer
+        load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data
+        BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer
             make_buffer_for_signature(outbuffs, inputsig, vlen);
+            memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1));
+        }
+    } else {
+        make_buffer_for_signature(inbuffs, inputsig, vlen);
+        BOOST_FOREACH(std::string arch, arch_list) {
+            make_buffer_for_signature(outbuffs, outputsig, vlen);
+        }
+    
+        //and set the input buffers to something random
+        for(int i=0; i<inbuffs.size(); i++) {
+            load_random_data(inbuffs[i], inputsig[i], vlen);        
         }
-        //copy input buffer[0] to all the output buffers so it has something to operate on
-        //output buffer element size is the same as input buffer[0]
-        if(
     }
-        
     
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
-        switch(outputsig.size()+inputsig.size()) {
+
+        switch(inputsig.size() + outputsig.size()) {
             case 1:
-                run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                if(inputsc.size() == 0) {
+                    run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
-                run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                if(inputsc.size() == 0) {
+                    run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
@@ -234,69 +328,52 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 break;
             default:
+                throw "no function handler for this signature";
                 break;
         }
+        
         end = clock();
         std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
     }
-
     //and now compare each output to the generic output
     //first we have to know which output is the generic one, they aren't in order...
     int generic_offset;
     for(int i=0; i<arch_list.size(); i++) 
         if(arch_list[i] == "generic") generic_offset=i;
-    
+        
+    //now compare
+    if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
+
+    bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0] == "32fc") {
-                for(int j=0; j<vlen*2; j++) {
-                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
-                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "32f") {
-                for(int j=0; j<vlen; j++) {
-                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
-                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint32_t *)(outbuffs[generic_offset]))[j] != ((uint32_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint32_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint32_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "16u" || outputsig[0] == "16s" || outputsig[0] == "8sc") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint16_t *)(outbuffs[generic_offset]))[j] != ((uint16_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint16_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint16_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "8s" || outputsig[0] == "8u") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint8_t *)(outbuffs[generic_offset]))[j] != ((uint8_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint8_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint8_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
+            if(outputsig[0].str == "32fc") {
+                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*2, tol);
+            } else if(outputsig[0].str == "32f") {
+                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen, tol);
+            } else if(outputsig[0].str == "32u" || outputsig[0].str == "32s" || outputsig[0].str == "16sc") {
+                fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen);
+            } else if(outputsig[0].size == 2) {
+                fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen);
+            } else if(outputsig[0].size == 1) {
+                fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen);
             } else { 
-                std::cout << "Error: invalid type " << outputsig[0] << std::endl;
-                return 1;
+                std::cout << "Error: invalid type " << outputsig[0].str << std::endl;
+                fail = true;
+            }
+            if(fail) {
+                std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
             }
         }
     }
 
-    BOOST_FOREACH(void *buf, inbuffs) {
-        free(buf);
-    }
-    BOOST_FOREACH(void *buf, outbuffs) {
-        free(buf);
-    }
-    return 0;
+//    BOOST_FOREACH(void *buf, inbuffs) {
+//        free(buf);
+//    }
+//    BOOST_FOREACH(void *buf, outbuffs) {
+//        free(buf);
+//    }
+    return fail;
 }
 
 
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 00883bf8e..79c5d7778 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -4,6 +4,17 @@
 #include <stdlib.h>
 #include <string>
 
+struct volk_type_t {
+    bool is_float;
+    bool is_scalar;
+    bool is_signed;
+    bool is_complex;
+    int size;
+    std::string str;
+};
+
+volk_type_t volk_type_from_string(std::string);
+
 float uniform(void);
 void random_floats(float *buf, unsigned n);
 
@@ -11,9 +22,11 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
 #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
-typedef void (*volk_fn_1arg)(void *, unsigned int, const char*);
+typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
+typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
 
 #endif //VOLK_QA_UTILS_H
-- 
cgit 


From e3600f59e76c3dc08aedfd77629b7c5c48df86af Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Thu, 20 Jan 2011 16:30:09 -0800
Subject: volk: renamed all files. added all tests. some test things are still
 broken.

---
 volk/lib/qa_utils.cc | 101 +++++++++++++++++++++++++++++++--------------------
 volk/lib/qa_utils.h  |   1 +
 2 files changed, 62 insertions(+), 40 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index e73b70985..4c151bd6f 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -19,7 +19,8 @@ float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
 }
 
-void random_floats (float *buf, unsigned n)
+template <class t>
+void random_floats (t *buf, unsigned n)
 {
   for (unsigned i = 0; i < n; i++)
     buf[i] = uniform ();
@@ -28,8 +29,8 @@ void random_floats (float *buf, unsigned n)
 void load_random_data(void *data, volk_type_t type, unsigned int n) {
     if(type.is_complex) n *= 2;
     if(type.is_float) {
-        assert(type.size == 4); //TODO: double support
-        random_floats((float *)data, n);
+        if(type.size == 8) random_floats<double>((double *)data, n);
+        else random_floats<float>((float *)data, n);
     } else {
         float int_max = pow(2, type.size*8);
         if(type.is_signed) int_max /= 2.0;
@@ -54,7 +55,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
                 else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
             break;
             default:
-                throw; //no shenanigans here
+                throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
             }
         }
     }
@@ -94,6 +95,9 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
         case (1<<LV_SSE2):
             archlist.push_back("sse2");
             break;
+        case (1<<LV_SSE3):
+            archlist.push_back("sse3");
+            break;
         case (1<<LV_SSSE3):
             archlist.push_back("ssse3");
             break;
@@ -128,7 +132,7 @@ volk_type_t volk_type_from_string(std::string name) {
     type.size = 0;
     type.str = name;
     
-    assert(name.size() > 1);
+    if(name.size() < 2) throw std::string("name too short to be a datatype");
     
     //is it a scalar?
     if(name[0] == 's') { 
@@ -138,7 +142,7 @@ volk_type_t volk_type_from_string(std::string name) {
     
     //get the data size
     int last_size_pos = name.find_last_of("0123456789");
-    if(last_size_pos < 0) throw 0;
+    if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
     //will throw if malformed
     int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
 
@@ -182,12 +186,14 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
     //ok. we're assuming a string in the form
     //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
 
-    enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT;
+    enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
     std::string fn_name;
     volk_type_t type;
     BOOST_FOREACH(std::string token, toked) {
         try {
             type = volk_type_from_string(token);
+            if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
+            
             if(side == SIDE_INPUT) inputsig.push_back(type);
             else outputsig.push_back(type);
         } catch (...){
@@ -201,9 +207,11 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
                 }
             }
             else if(side == SIDE_INPUT) { //it's the function name, at least it better be
-                side = SIDE_OUTPUT;
-                fn_name = token;
-            } else {
+                side = SIDE_NAME;
+                fn_name.append("_");
+                fn_name.append(token);
+            } 
+            else if(side == SIDE_OUTPUT) {
                 if(token != toked.back()) throw; //the last token in the name is the alignment
             }
         }
@@ -236,20 +244,40 @@ inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vect
     while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
 }
 
+inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str());
+}
+
 template <class t>
 bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    bool fail = false;
+    int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) return 1;
+        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
+            }
+        }
     }
-    return 0;
+    
+    return fail;
 }
 
 template <class t>
-bool icompare(t *in1, t *in2, unsigned int vlen) {
+bool icompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    bool fail = false;
+    int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] != ((t *)(in2))[i]) return 1;
+        if(((t *)(in1))[i] != ((t *)(in2))[i]) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl;
+            }
+        }
     }
-    return 0;
+    
+    return fail;
 }
 
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
@@ -300,7 +328,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
             load_random_data(inbuffs[i], inputsig[i], vlen);        
         }
     }
-    
+
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
@@ -311,18 +339,22 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
-                run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                if(inputsc.size() == 0) {
+                    run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
                 run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
@@ -337,29 +369,24 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     }
     //and now compare each output to the generic output
     //first we have to know which output is the generic one, they aren't in order...
-    int generic_offset;
+    int generic_offset=0;
     for(int i=0; i<arch_list.size(); i++) 
         if(arch_list[i] == "generic") generic_offset=i;
-        
+
     //now compare
     if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
 
     bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0].str == "32fc") {
-                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*2, tol);
-            } else if(outputsig[0].str == "32f") {
-                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen, tol);
-            } else if(outputsig[0].str == "32u" || outputsig[0].str == "32s" || outputsig[0].str == "16sc") {
-                fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen);
-            } else if(outputsig[0].size == 2) {
-                fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen);
-            } else if(outputsig[0].size == 1) {
-                fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen);
-            } else { 
-                std::cout << "Error: invalid type " << outputsig[0].str << std::endl;
-                fail = true;
+            if(outputsig[0].is_float) {
+                if(outputsig[0].size == 8) {
+                    fail = fcompare((double *) outbuffs[generic_offset], (double *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                } else {
+                    fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                }
+            } else {
+                fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
             if(fail) {
                 std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
@@ -367,12 +394,6 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         }
     }
 
-//    BOOST_FOREACH(void *buf, inbuffs) {
-//        free(buf);
-//    }
-//    BOOST_FOREACH(void *buf, outbuffs) {
-//        free(buf);
-//    }
     return fail;
 }
 
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 79c5d7778..79fc8f006 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -28,5 +28,6 @@ typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
 typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
+typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
 
 #endif //VOLK_QA_UTILS_H
-- 
cgit 


From 82cafc4381e48ccc9423d2dc88720e5c1347d940 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 12:26:52 -0800
Subject: Volk: fixed naming error. test coverage @ 75%, still need to add
 support for multiple outputs in the checker. some errors in the library were
 exposed by the new test suite, and a couple of bad Orc functions. need to
 investigate.

---
 volk/lib/qa_utils.cc | 51 +++++++++++++++++++++++++++++++++++++++++++--------
 volk/lib/testqa.cc   | 34 ++++++++++++++++------------------
 2 files changed, 59 insertions(+), 26 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 4c151bd6f..8f57a9b90 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -253,6 +253,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
+        if(((t *)(in1))[i] < 1e-30) continue; //below around here we'll start to get roundoff errors due to float precision
         if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
@@ -265,14 +266,14 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
 }
 
 template <class t>
-bool icompare(t *in1, t *in2, unsigned int vlen, float tol) {
+bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] != ((t *)(in2))[i]) {
+        if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
-                std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl;
+                std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
             }
         }
     }
@@ -339,21 +340,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
                     run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
@@ -375,7 +376,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
 
     //now compare
     if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-
+    //TODO: loop over the output signature as well
     bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
@@ -386,7 +387,41 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                     fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
                 }
             } else {
-                fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
+                //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
+                switch(outputsig[0].size) {
+                case 8:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int64_t *) outbuffs[generic_offset], (int64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint64_t *) outbuffs[generic_offset], (uint64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 4:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int32_t *) outbuffs[generic_offset], (int32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 2:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int16_t *) outbuffs[generic_offset], (int16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 1:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int8_t *) outbuffs[generic_offset], (int8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                default:
+                    fail=1;
+                }
+                    
+                //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
             if(fail) {
                 std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 1ee264fb4..f813e843f 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -11,18 +11,16 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
 //    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
@@ -37,7 +35,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
@@ -45,25 +43,25 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1e-4, 2046, 10000);
+    //VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+    //VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
-- 
cgit 


From 7a5a751073cc1583533b84c90ecc985b3669a696 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:14:26 -0800
Subject: Volk: added QA support for multiple outputs, scalar outputs. 92% test
 coverage within the framework.

---
 volk/lib/qa_utils.cc | 183 +++++++++++++++++++++++++--------------------------
 volk/lib/testqa.cc   |  42 ++++++------
 2 files changed, 112 insertions(+), 113 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 8f57a9b90..b1c55fc05 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -66,6 +66,7 @@ void *make_aligned_buffer(unsigned int len, unsigned int size) {
   int ret;
   ret = posix_memalign((void**)&buf, 16, len * size);
   assert(ret == 0);
+  memset(buf, 0x00, len*size);
   return buf;
 }
 
@@ -220,32 +221,32 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
     assert(inputsig.size() != 0);
 }
 
-inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buff, vlen, arch.c_str());
+inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], vlen, arch.c_str());
 }
 
-inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str());
+inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
 }
 
-inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str());
+inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
 }
 
-inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
+inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
 }
 
-inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buff, scalar, vlen, arch.c_str());
+inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
 }
 
-inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
+inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
 }
 
-inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str());
+inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
 }
 
 template <class t>
@@ -253,7 +254,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] < 1e-30) continue; //below around here we'll start to get roundoff errors due to float precision
+        if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision
         if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
@@ -291,74 +292,70 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
     
-    std::vector<volk_type_t> inputsc, outputsc;
+    //pull the input scalars into their own vector
+    std::vector<volk_type_t> inputsc;
     for(int i=0; i<inputsig.size(); i++) {
         if(inputsig[i].is_scalar) {
             inputsc.push_back(inputsig[i]);
             inputsig.erase(inputsig.begin() + i);
         }
     }
-    for(int i=0; i<outputsig.size(); i++) {
-        if(outputsig[i].is_scalar) {
-            outputsc.push_back(outputsig[i]);
-            outputsig.erase(outputsig.begin() + i);
-        }
-    }
-    assert(outputsc.size() == 0); //we don't do output scalars yet
 
     //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
     //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
-    std::vector<void *> inbuffs, outbuffs;
+    std::vector<void *> inbuffs;
+
+    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    for(int i=0; i<inbuffs.size(); i++) {
+        load_random_data(inbuffs[i], inputsig[i], vlen);        
+    }
     
-    if(outputsig.size() == 0) { //we're operating in place...
-        //assert(inputsig.size() == 1); //we only support 0 output 1 input right now...
-        make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer
-        load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data
-        BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer
-            make_buffer_for_signature(outbuffs, inputsig, vlen);
-            memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1));
-        }
-    } else {
-        make_buffer_for_signature(inbuffs, inputsig, vlen);
-        BOOST_FOREACH(std::string arch, arch_list) {
-            make_buffer_for_signature(outbuffs, outputsig, vlen);
+    //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
+    std::vector<std::vector<void *> > test_data;
+    for(int i=0; i<arch_list.size(); i++) {
+        std::vector<void *> arch_buffs;
+        for(int j=0; j<outputsig.size(); j++) {
+            arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
         }
-    
-        //and set the input buffers to something random
-        for(int i=0; i<inbuffs.size(); i++) {
-            load_random_data(inbuffs[i], inputsig[i], vlen);        
+        for(int j=0; j<inputsig.size(); j++) {
+            arch_buffs.push_back(inbuffs[j]);
         }
+        test_data.push_back(arch_buffs);
     }
+    
+    std::vector<volk_type_t> both_sigs;
+    both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
+    both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
 
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
 
-        switch(inputsig.size() + outputsig.size()) {
+        switch(both_sigs.size()) {
             case 1:
                 if(inputsc.size() == 0) {
-                    run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
-                    run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                    run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
-                    run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                    run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
-                run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 break;
             default:
                 throw "no function handler for this signature";
@@ -375,61 +372,63 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         if(arch_list[i] == "generic") generic_offset=i;
 
     //now compare
-    if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-    //TODO: loop over the output signature as well
+    //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
+    
     bool fail = false;
+    bool fail_global = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0].is_float) {
-                if(outputsig[0].size == 8) {
-                    fail = fcompare((double *) outbuffs[generic_offset], (double *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                } else {
-                    fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                }
-            } else {
-                //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
-                switch(outputsig[0].size) {
-                case 8:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int64_t *) outbuffs[generic_offset], (int64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint64_t *) outbuffs[generic_offset], (uint64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    }
-                    break;
-                case 4:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int32_t *) outbuffs[generic_offset], (int32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+            for(int j=0; j<both_sigs.size(); j++) {
+                if(both_sigs[j].is_float) {
+                    if(both_sigs[j].size == 8) {
+                        fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
                     } else {
-                        fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                        fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
                     }
-                    break;
-                case 2:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int16_t *) outbuffs[generic_offset], (int16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    }
-                    break;
-                case 1:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int8_t *) outbuffs[generic_offset], (int8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                } else {
+                    //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
+                    switch(both_sigs[j].size) {
+                    case 8:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 4:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 2:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 1:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    default:
+                        fail=1;
                     }
-                    break;
-                default:
-                    fail=1;
                 }
-                    
+                if(fail) {
+                    fail_global = true;
+                    std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
+                }
                 //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
-            if(fail) {
-                std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
-            }
         }
     }
-
-    return fail;
+    return fail_global;
 }
 
 
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f813e843f..4dd7f7599 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -7,13 +7,13 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     //in order...
 //    VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
@@ -21,25 +21,25 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000);
-//    VOLK_RUN_TESTS(volk_32f_calc_spectral_noise_floor_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
@@ -49,19 +49,19 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
-    //VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    //VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
@@ -84,8 +84,8 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-- 
cgit 


From fa8c8c8e9fcd74eda5edb58edc89be97bc4bfa0a Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:29:08 -0800
Subject: Volk: added ability to spec scalar in test invocation

---
 volk/lib/qa_utils.cc |   8 +--
 volk/lib/qa_utils.h  |   4 +-
 volk/lib/testqa.cc   | 172 +++++++++++++++++++++++++--------------------------
 3 files changed, 92 insertions(+), 92 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index b1c55fc05..67ce5ddef 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -282,7 +282,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
     return fail;
 }
 
-bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
+bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
     
     //first let's get a list of available architectures for the test
@@ -337,21 +337,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
                     run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 79fc8f006..e2539060a 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -18,9 +18,9 @@ volk_type_t volk_type_from_string(std::string);
 float uniform(void);
 void random_floats(float *buf, unsigned n);
 
-bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
+bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int);
 
-#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0)
 
 typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 4dd7f7599..9f4934dc0 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -7,93 +7,93 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     //in order...
 //    VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000);
-    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
+    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32u_popcnt_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_convert_16i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
 
 }
-- 
cgit 


From 6091bad60cdfdf21624da452c7a8b74405345070 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:41:30 -0800
Subject: Volk: removed all the old QA code that is covered by the test
 framework.

---
 volk/lib/Makefile.am                               |   1 -
 volk/lib/qa_16s_convert_32f_aligned16.cc           |  74 -------
 volk/lib/qa_16s_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_16s_convert_32f_unaligned16.cc         |  74 -------
 volk/lib/qa_16s_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_16s_convert_8s_aligned16.cc            |  61 ------
 volk/lib/qa_16s_convert_8s_aligned16.h             |  18 --
 volk/lib/qa_16s_convert_8s_unaligned16.cc          |  61 ------
 volk/lib/qa_16s_convert_8s_unaligned16.h           |  18 --
 volk/lib/qa_16s_max_star_aligned16.cc              |  65 -------
 volk/lib/qa_16s_max_star_aligned16.h               |  18 --
 volk/lib/qa_16s_max_star_horizontal_aligned16.cc   |  79 --------
 volk/lib/qa_16s_max_star_horizontal_aligned16.h    |  18 --
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc     |  89 ---------
 volk/lib/qa_16sc_deinterleave_16s_aligned16.h      |  18 --
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc     |  75 --------
 volk/lib/qa_16sc_deinterleave_32f_aligned16.h      |  18 --
 .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc |  72 -------
 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h |  18 --
 .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 124 ------------
 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h |  18 --
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc |  70 -------
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h  |  18 --
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc        |  81 --------
 volk/lib/qa_16sc_magnitude_16s_aligned16.h         |  18 --
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc        | 131 -------------
 volk/lib/qa_16sc_magnitude_32f_aligned16.h         |  18 --
 volk/lib/qa_16u_byteswap_aligned16.cc              |  71 -------
 volk/lib/qa_16u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_32f_accumulator_aligned16.cc           |  57 ------
 volk/lib/qa_32f_accumulator_aligned16.h            |  18 --
 volk/lib/qa_32f_add_aligned16.cc                   | 123 ------------
 volk/lib/qa_32f_add_aligned16.h                    |  18 --
 .../qa_32f_calc_spectral_noise_floor_aligned16.cc  |  60 ------
 .../qa_32f_calc_spectral_noise_floor_aligned16.h   |  18 --
 volk/lib/qa_32f_convert_16s_aligned16.cc           |  71 -------
 volk/lib/qa_32f_convert_16s_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_16s_unaligned16.cc         |  71 -------
 volk/lib/qa_32f_convert_16s_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_32s_aligned16.cc           |  71 -------
 volk/lib/qa_32f_convert_32s_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_32s_unaligned16.cc         |  71 -------
 volk/lib/qa_32f_convert_32s_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_64f_aligned16.cc           |  61 ------
 volk/lib/qa_32f_convert_64f_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_64f_unaligned16.cc         |  61 ------
 volk/lib/qa_32f_convert_64f_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_8s_aligned16.cc            |  71 -------
 volk/lib/qa_32f_convert_8s_aligned16.h             |  18 --
 volk/lib/qa_32f_convert_8s_unaligned16.cc          |  71 -------
 volk/lib/qa_32f_convert_8s_unaligned16.h           |  18 --
 volk/lib/qa_32f_divide_aligned16.cc                | 133 -------------
 volk/lib/qa_32f_divide_aligned16.h                 |  18 --
 volk/lib/qa_32f_dot_prod_aligned16.cc              | 183 ------------------
 volk/lib/qa_32f_dot_prod_aligned16.h               |  18 --
 volk/lib/qa_32f_dot_prod_unaligned16.cc            | 190 ------------------
 volk/lib/qa_32f_dot_prod_unaligned16.h             |  18 --
 volk/lib/qa_32f_interleave_16sc_aligned16.cc       |  76 --------
 volk/lib/qa_32f_interleave_16sc_aligned16.h        |  18 --
 volk/lib/qa_32f_interleave_32fc_aligned16.cc       |  63 ------
 volk/lib/qa_32f_interleave_32fc_aligned16.h        |  18 --
 volk/lib/qa_32f_max_aligned16.cc                   |  70 -------
 volk/lib/qa_32f_max_aligned16.h                    |  18 --
 volk/lib/qa_32f_min_aligned16.cc                   |  70 -------
 volk/lib/qa_32f_min_aligned16.h                    |  18 --
 volk/lib/qa_32f_multiply_aligned16.cc              | 123 ------------
 volk/lib/qa_32f_multiply_aligned16.h               |  18 --
 volk/lib/qa_32f_normalize_aligned16.cc             |  79 --------
 volk/lib/qa_32f_normalize_aligned16.h              |  18 --
 volk/lib/qa_32f_power_aligned16.cc                 |  95 ---------
 volk/lib/qa_32f_power_aligned16.h                  |  18 --
 volk/lib/qa_32f_sqrt_aligned16.cc                  | 128 ------------
 volk/lib/qa_32f_sqrt_aligned16.h                   |  18 --
 volk/lib/qa_32f_stddev_aligned16.cc                |  75 --------
 volk/lib/qa_32f_stddev_aligned16.h                 |  18 --
 volk/lib/qa_32f_stddev_and_mean_aligned16.cc       |  76 --------
 volk/lib/qa_32f_stddev_and_mean_aligned16.h        |  18 --
 volk/lib/qa_32f_subtract_aligned16.cc              |  70 -------
 volk/lib/qa_32f_subtract_aligned16.h               |  18 --
 volk/lib/qa_32f_sum_of_poly_aligned16.cc           | 142 --------------
 volk/lib/qa_32f_sum_of_poly_aligned16.h            |  18 --
 volk/lib/qa_32fc_32f_multiply_aligned16.cc         |  75 --------
 volk/lib/qa_32fc_32f_multiply_aligned16.h          |  18 --
 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc       |  83 --------
 volk/lib/qa_32fc_32f_power_32fc_aligned16.h        |  18 --
 volk/lib/qa_32fc_atan2_32f_aligned16.cc            |  76 --------
 volk/lib/qa_32fc_atan2_32f_aligned16.h             |  18 --
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc   | 138 -------------
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h    |  18 --
 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc     |  64 ------
 volk/lib/qa_32fc_deinterleave_32f_aligned16.h      |  18 --
 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc     |  64 ------
 volk/lib/qa_32fc_deinterleave_64f_aligned16.h      |  18 --
 .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h |  18 --
 .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h |  18 --
 .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h |  18 --
 volk/lib/qa_32fc_dot_prod_aligned16.cc             | 214 ---------------------
 volk/lib/qa_32fc_dot_prod_aligned16.h              |  20 --
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc        |  80 --------
 volk/lib/qa_32fc_magnitude_16s_aligned16.h         |  18 --
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc        |  80 --------
 volk/lib/qa_32fc_magnitude_32f_aligned16.h         |  18 --
 volk/lib/qa_32fc_multiply_aligned16.cc             |  98 ----------
 volk/lib/qa_32fc_multiply_aligned16.h              |  18 --
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc   |  64 ------
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h    |  18 --
 volk/lib/qa_32fc_square_dist_aligned16.cc          |  91 ---------
 volk/lib/qa_32fc_square_dist_aligned16.h           |  18 --
 .../qa_32fc_square_dist_scalar_mult_aligned16.cc   |  96 ---------
 .../qa_32fc_square_dist_scalar_mult_aligned16.h    |  18 --
 volk/lib/qa_32s_and_aligned16.cc                   |  70 -------
 volk/lib/qa_32s_and_aligned16.h                    |  18 --
 volk/lib/qa_32s_convert_32f_aligned16.cc           |  61 ------
 volk/lib/qa_32s_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_32s_convert_32f_unaligned16.cc         |  61 ------
 volk/lib/qa_32s_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_32s_or_aligned16.cc                    |  70 -------
 volk/lib/qa_32s_or_aligned16.h                     |  18 --
 volk/lib/qa_32u_byteswap_aligned16.cc              |  60 ------
 volk/lib/qa_32u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_64f_convert_32f_aligned16.cc           |  61 ------
 volk/lib/qa_64f_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_64f_convert_32f_unaligned16.cc         |  61 ------
 volk/lib/qa_64f_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_64f_max_aligned16.cc                   |  61 ------
 volk/lib/qa_64f_max_aligned16.h                    |  18 --
 volk/lib/qa_64f_min_aligned16.cc                   |  61 ------
 volk/lib/qa_64f_min_aligned16.h                    |  18 --
 volk/lib/qa_64u_byteswap_aligned16.cc              |  60 ------
 volk/lib/qa_64u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_8s_convert_16s_aligned16.cc            |  64 ------
 volk/lib/qa_8s_convert_16s_aligned16.h             |  18 --
 volk/lib/qa_8s_convert_16s_unaligned16.cc          |  64 ------
 volk/lib/qa_8s_convert_16s_unaligned16.h           |  18 --
 volk/lib/qa_8s_convert_32f_aligned16.cc            |  72 -------
 volk/lib/qa_8s_convert_32f_aligned16.h             |  18 --
 volk/lib/qa_8s_convert_32f_unaligned16.cc          |  64 ------
 volk/lib/qa_8s_convert_32f_unaligned16.h           |  18 --
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc      |  68 -------
 volk/lib/qa_8sc_deinterleave_16s_aligned16.h       |  18 --
 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc      | 135 -------------
 volk/lib/qa_8sc_deinterleave_32f_aligned16.h       |  18 --
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc |  65 -------
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h  |  18 --
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 139 -------------
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h  |  18 --
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc  |  61 ------
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h   |  18 --
 .../qa_8sc_multiply_conjugate_16sc_aligned16.cc    |  87 ---------
 .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h |  18 --
 .../qa_8sc_multiply_conjugate_32fc_aligned16.cc    |  87 ---------
 .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h |  18 --
 volk/lib/qa_volk.cc                                | 211 --------------------
 volk/lib/qa_volk.h                                 |  36 ----
 volk/lib/test_all.cc                               |  82 --------
 158 files changed, 8144 deletions(-)
 delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h
 delete mode 100644 volk/lib/qa_16s_max_star_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_max_star_aligned16.h
 delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.h
 delete mode 100644 volk/lib/qa_32f_add_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_add_aligned16.h
 delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_divide_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_divide_aligned16.h
 delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h
 delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_32f_max_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_max_aligned16.h
 delete mode 100644 volk/lib/qa_32f_min_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_min_aligned16.h
 delete mode 100644 volk/lib/qa_32f_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32f_normalize_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_normalize_aligned16.h
 delete mode 100644 volk/lib/qa_32f_power_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_power_aligned16.h
 delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.h
 delete mode 100644 volk/lib/qa_32f_stddev_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_stddev_aligned16.h
 delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h
 delete mode 100644 volk/lib/qa_32f_subtract_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_subtract_aligned16.h
 delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
 delete mode 100644 volk/lib/qa_32s_and_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_and_aligned16.h
 delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_32s_or_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_or_aligned16.h
 delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_64f_max_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_max_aligned16.h
 delete mode 100644 volk/lib/qa_64f_min_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_min_aligned16.h
 delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_volk.cc
 delete mode 100644 volk/lib/qa_volk.h
 delete mode 100644 volk/lib/test_all.cc

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 63df85244..bbc993fa2 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -110,7 +110,6 @@ endif
 # ----------------------------------------------------------------
 noinst_HEADERS = \
 	volk_init.h \
-	qa_volk.h \
 	qa_utils.h \
 	assembly.h
 
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc
deleted file mode 100644
index 6215f4a64..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <volk/volk_16s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h
deleted file mode 100644
index ef813d96f..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc
deleted file mode 100644
index 46c2e48ac..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <volk/volk_16s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h
deleted file mode 100644
index aeb04f770..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc
deleted file mode 100644
index 8225aa0cf..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <volk/volk_16s_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h
deleted file mode 100644
index 2e409d0cc..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc
deleted file mode 100644
index e6ce5030e..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <volk/volk_16s_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h
deleted file mode 100644
index 4b2fe9e42..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc
deleted file mode 100644
index c6f828ba6..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <volk/volk_16s_max_star_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-
-void qa_16s_max_star_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 6400;
-  const int ITERS = 100000;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[1] __attribute__ ((aligned (16)));
-
-  short output1[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-
-    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    
-    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h
deleted file mode 100644
index 119f87c4d..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
deleted file mode 100644
index 0a58570e2..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <volk/volk_16s_max_star_horizontal_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-
-  
-  volk_runtime_init();
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 32;
-  const int ITERS = 1;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[vlen>>1] __attribute__ ((aligned (16)));
-
-  short output1[vlen>>1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    short minus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_horizontal_aligned\n");
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    /*    volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  
-  for(int i = 0; i < (vlen >> 1); ++i) {
-    //    printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-    
-  }
-  for(int i = 0; i < (vlen >> 1); ++i) {
-      
-      CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-    }
-	}
-   
-  
-#endif
-	
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h
deleted file mode 100644
index 9f9757253..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index aadc39067..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse21[i]);
-
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
-    
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_orc[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_orc1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 995ab5b34..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 13151be13..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-  float output_sse21[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_orc1[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_orc1[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index fea3b6c2d..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index c67064ea6..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index ebb70b97a..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index f86f03b88..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE */
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE4_1 */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index e83426473..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index 803caaa2d..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-  int8_t output_orc[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 04e5511e5..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
deleted file mode 100644
index 7fbdd8620..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <volk/volk_16sc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h
deleted file mode 100644
index 4664b70f4..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
deleted file mode 100644
index 54cc2ba6e..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <volk/volk_16sc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  float scale = 32768.0;
-  for(int i = 0; i < vlen; ++i) {   
-    float re = (float)(input0[i].real())/scale;
-    float im = (float)(input0[i].imag())/scale;
-    output_known[i] = sqrt(re*re + im*im);
-  }
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 100; ++i) {
-    printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag());
-    printf("generic... %f == %f\n", output_generic[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-/*  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-*/
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-//    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h
deleted file mode 100644
index 0c25673ea..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
deleted file mode 100644
index c2295968b..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <volk/volk_16u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint16_t output0[vlen] __attribute__ ((aligned (16)));
-  uint16_t output01[vlen] __attribute__ ((aligned (16)));
-  uint16_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint16_t));
-  memcpy(output02, output0, vlen*sizeof(uint16_t));
-
-  printf("16u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output02, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);    
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h
deleted file mode 100644
index e11b23e3f..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc
deleted file mode 100644
index 0defef283..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <volk/volk_32f_accumulator_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_accumulator_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_accumulator_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float accumulator_generic;
-  float accumulator_sse;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_accumulator_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h
deleted file mode 100644
index 0004d3ff0..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_accumulator_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
deleted file mode 100644
index a183d4d85..000000000
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_add_aligned16.h>
-#include <volk/volk_32f_add_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_add_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] + input1[i];
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_add_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h
deleted file mode 100644
index 58e2a151c..000000000
--- a/volk/lib/qa_32f_add_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H
-#define INCLUDED_QA_32F_ADD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_add_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_add_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
deleted file mode 100644
index 5d6987333..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h>
-#include <cstdlib>
-#include <math.h>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[1] __attribute__ ((aligned (16)));
-  float output01[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_calc_spectral_noise_floor_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
deleted file mode 100644
index c5dce2c4b..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc
deleted file mode 100644
index 3e2452e68..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <volk/volk_32f_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h
deleted file mode 100644
index fce1eb417..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc
deleted file mode 100644
index e016b7ff7..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <volk/volk_32f_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h
deleted file mode 100644
index 492bc80e6..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc
deleted file mode 100644
index abceb52fb..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <volk/volk_32f_convert_32s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h
deleted file mode 100644
index 97d854463..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc
deleted file mode 100644
index 90f84b56f..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <volk/volk_32f_convert_32s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h
deleted file mode 100644
index 5d662d86d..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc
deleted file mode 100644
index 1d0754ac9..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <volk/volk_32f_convert_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h
deleted file mode 100644
index 41eb3e094..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc
deleted file mode 100644
index 6f7d5066d..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <volk/volk_32f_convert_64f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h
deleted file mode 100644
index 4b144f033..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc
deleted file mode 100644
index 6a53629b5..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <volk/volk_32f_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h
deleted file mode 100644
index 68a523f34..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc
deleted file mode 100644
index fbc5c20e6..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <volk/volk_32f_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h
deleted file mode 100644
index 88d4ff42a..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
deleted file mode 100644
index f2a1b9e7f..000000000
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_divide_aligned16.h>
-#include <volk/volk_32f_divide_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_divide_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output1[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] / input1[i];
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-    CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_divide_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h
deleted file mode 100644
index 79d5ae4b8..000000000
--- a/volk/lib/qa_32f_divide_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_divide_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc
deleted file mode 100644
index 98c1f2d99..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_aligned16::t1() {
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h
deleted file mode 100644
index 6931a9e98..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc
deleted file mode 100644
index 8e97d4249..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_unaligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_unaligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  input = &input[1]; // Make sure the buffer is unaligned
-  taps = &taps[1]; // Make sure the buffer is unaligned
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_unaligned16\n");
-  
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(&input[-1]);
-  free(&taps[-1]);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h
deleted file mode 100644
index e8bad07fe..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
deleted file mode 100644
index a7ae60780..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <volk/volk_32f_interleave_16sc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_16sc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01);
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h
deleted file mode 100644
index 8d2914817..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
deleted file mode 100644
index 333b6fce8..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <volk/volk_32f_interleave_32fc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<float> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<float> output_sse[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_32fc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h
deleted file mode 100644
index cba518d37..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
deleted file mode 100644
index 98f8ce9bc..000000000
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_max_aligned16.h>
-#include <volk/volk_32f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_max_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h
deleted file mode 100644
index d535479f4..000000000
--- a/volk/lib/qa_32f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H
-#define INCLUDED_QA_32F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
deleted file mode 100644
index 798b47c53..000000000
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_min_aligned16.h>
-#include <volk/volk_32f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_min_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h
deleted file mode 100644
index 90961ac92..000000000
--- a/volk/lib/qa_32f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H
-#define INCLUDED_QA_32F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
deleted file mode 100644
index aa17cd62e..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <volk/volk_32f_multiply_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_multiply_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] * input1[i];
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_multiply_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h
deleted file mode 100644
index 7032a2ad4..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
deleted file mode 100644
index 0da43ecff..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <volk/volk_32f_normalize_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_normalize_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_normalize_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 320001;
-  const int ITERS = 100;
-
-  float* output0;
-  float* output01;
-  float* output02;
-  ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
-  ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
-  ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(float));
-  memcpy(output02, output0, vlen*sizeof(float));
-  printf("32f_normalize_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    // printf("%e...%e\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4);
-  }
-
-  free(output0);
-  free(output01);
-  free(output02);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h
deleted file mode 100644
index 7c421eb82..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_normalize_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc
deleted file mode 100644
index 1b331daeb..000000000
--- a/volk/lib/qa_32f_power_aligned16.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_power_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32f_power_aligned16::t1() {
-
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float* input;
-  int i;
-  
-  float* result_generic;
-  float* result_sse;
-  float* result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen *  sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float));
-
-  random_floats((float*)input, vlen);
-
-  const float power = 3;
-  
-  printf("32f_power_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32f_power_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h
deleted file mode 100644
index d45df4e56..000000000
--- a/volk/lib/qa_32f_power_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H
-#define INCLUDED_QA_32F_POWER_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_power_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_power_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
deleted file mode 100644
index c216ce5d5..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <volk/volk_32f_sqrt_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_sqrt_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-    output_known[i] = sqrt(input0[i]);
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f\n", input0[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_32f_sqrt_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h
deleted file mode 100644
index e4b99d981..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H
-#define INCLUDED_QA_32F_SQRT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sqrt_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc
deleted file mode 100644
index 5934d70df..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <volk/volk_32f_stddev_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean = 0;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    mean += input0[i];
-  }
-  mean /= static_cast<float>(vlen);
-
-  printf("32f_stddev_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h
deleted file mode 100644
index 7f8d7a5fc..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
deleted file mode 100644
index 78c701d78..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-#include <volk/volk_32f_stddev_and_mean_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean_generic;
-  float mean_sse;
-  float mean_sse4_1;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_stddev_and_mean_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4);
-
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h
deleted file mode 100644
index e08bd249a..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
deleted file mode 100644
index 1e2210203..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <volk/volk_32f_subtract_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_subtract_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_subtract_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_subtract_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h
deleted file mode 100644
index 97c14f129..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_subtract_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc
deleted file mode 100644
index 494776357..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <math.h>
-
-#define SNR 30.0
-#define CENTER -4.0
-#define CUTOFF -5.595
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 100000
-#define VEC_LEN 64
-static float uniform() {
-  return ((float) rand() / RAND_MAX);	// uniformly (0, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] =  uniform () * -SNR/2.0;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  int i = 0;
-  
-  volk_environment_init();
-  int ret;
-
-  const int vlen = VEC_LEN;
-  float cutoff = CUTOFF;
-  
-  float* center_point_array;
-  float* target;
-  float* target_generic;
-  float* src0 ;
-
-
-  ret = posix_memalign((void**)&center_point_array, 16, 24);
-  ret = posix_memalign((void**)&target, 16, 4);
-  ret = posix_memalign((void**)&target_generic, 16, 4);
-  ret = posix_memalign((void**)&src0, 16, (vlen << 2));
-  
- 
-  random_floats((float*)src0, vlen);
- 
-  float a = (float)CENTER;
-  float etoa = expf(a);
-  center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 +
-			   (-4.0 * a * a * a)/24.0 + 
-			   (3.0 * a * a)/6.0 +
-			   (-2.0 * a)/2.0 +
-			   (1.0)) * etoa;
-  center_point_array[1] = (//(-10.0 * a * a * a)/120.0 +
-			   (6.0 * a * a)/24.0 + 
-			   (-3.0 * a)/6.0 +
-			   (1.0/2.0)) * etoa;
-  center_point_array[2] = (//(10.0 * a * a)/120.0 +
-			   (-4.0 * a)/24.0 +
-			   (1.0/6.0)) * etoa;
-  center_point_array[3] = (//(-5.0 * a)/120.0 +
-			   (1.0/24.0)) * etoa;
-  //center_point_array[4] = ((1.0)/120.0) * etoa;
-  center_point_array[4] = (//(a * a * a * a * a)/120.0 +
-			   (a * a * a * a)/24.0 +
-			   (a * a * a)/-6.0 +
-			   (a * a)/2.0 +
-			   -a + 1.0) * etoa;
-  
-  printf("32f_sum_of_poly_aligned16\n");
-
-  clock_t start, end;
-  double total;
-  
-  float my_sum = 0.0;
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    float sum = 0.0;
-    for(int l = 0; l < vlen; ++l) {
-      
-      sum += expf(src0[l]);
-      
-    }
-    my_sum = sum;
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("exp time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    
-    volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic");
-  
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 approx time: %f\n", total);
-
-
-  
-  printf("exp: %f, sse3: %f\n", my_sum, target[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA);
-  
-
-  free(center_point_array);
-  free(target);
-  free(target_generic);
-  free(src0);
-
-  
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h
deleted file mode 100644
index 67a347f9a..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
deleted file mode 100644
index b80e0e008..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <stdlib.h>
-#include <time.h>
-#include <string.h>
-#include <qa_utils.h>
-#include <boost/test/unit_test.hpp>
-
-#define	TOLERANCE	(1e-4)
-
-void qa_32fc_32f_multiply_aligned16(void) {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  float * taps;
-  int i;
-  std::vector<std::string> archs;
-  archs.push_back("generic");
-#ifdef LV_HAVE_SSE3
-  archs.push_back("sse3");
-#endif
-#ifdef LV_HAVE_ORC
-  archs.push_back("orc");
-#endif
-  
-  std::vector<std::complex<float>* > results;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  
-  for(i=0; i < archs.size(); i++) {
-      std::complex<float> *ptr;
-      ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float));
-      if(ret) {
-          printf("Couldn't allocate memory\n");
-          exit(1);
-      }
-      results.push_back(ptr);
-  }
-
-  random_floats((float*)input, vlen * 2);
-  random_floats(taps, vlen);
-  
-  printf("32fc_32f_multiply_aligned16\n");
-
-  for(i=0; i < archs.size(); i++) {
-    start = clock();
-    for(int count = 0; count < ITERS; ++count) {
-      volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str());
-    }
-    end = clock();
-    total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-    printf("%s_time: %f\n", archs[i].c_str(), total);
-  }
-
-  for(i=0; i < vlen; i++) {
-      int j = 1;
-      for(j; j < archs.size(); j++) {
-          assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA);
-      }
-  }
-
-  free(input);
-  free(taps);
-  for(i=0; i < archs.size(); i++) {      
-    free(results[i]);
-  }
-}
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h
deleted file mode 100644
index fc3b3eeb2..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
deleted file mode 100644
index 64ea65da9..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1.5e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  int i;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float));
-
-  random_floats((float*)input, vlen * 2);
-
-  const float power = 3.2;
-  
-  printf("32fc_32f_power_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
deleted file mode 100644
index 464b7b7cc..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
deleted file mode 100644
index c55ab5aa0..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <volk/volk_32fc_atan2_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_atan2_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h
deleted file mode 100644
index 9c4dc209a..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
deleted file mode 100644
index 2f9a30395..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#else
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
deleted file mode 100644
index 507b1769b..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 72e084c05..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 78660e6ad..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
deleted file mode 100644
index 89770c236..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_generic1[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-  double output_sse21[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
deleted file mode 100644
index f924b9752..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 7472476f7..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 68b80f27d..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 5cbdc49b3..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 765450bb6..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
deleted file mode 100644
index 4147e30ae..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
deleted file mode 100644
index 3e55fb812..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc
deleted file mode 100644
index bcf9ea954..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <stdio.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-
-
-#if LV_HAVE_SSE3
-void qa_32fc_dot_prod_aligned16::t1() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_32
-void qa_32fc_dot_prod_aligned16::t2() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_32_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t2() {
-  printf("sse_32 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-void qa_32fc_dot_prod_aligned16::t3() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_64_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t3() {
-  printf("sse_64 not available... no test performed\n");
-}
-
-
-
-#endif 
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h
deleted file mode 100644
index 4b360db27..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-  void t2 ();
-  void t3 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
deleted file mode 100644
index c718b6b71..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <volk/volk_32fc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h
deleted file mode 100644
index ffdf1dd9e..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
deleted file mode 100644
index 1d475fb86..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <volk/volk_32fc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h
deleted file mode 100644
index a2881308c..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
deleted file mode 100644
index 022b58ad6..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
-void qa_32fc_multiply_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  int i;
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-  std::complex<float>* result_orc;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float));
-  
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_multiply_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
-    assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  free(result_orc);
-  
-}
-#else
-void qa_32fc_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h
deleted file mode 100644
index c8abaa8fe..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
deleted file mode 100644
index 1444c78a9..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <volk/volk_32fc_power_spectrum_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse3
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  const float scalar = vlen;
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-
-  printf("32fc_power_spectrum_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
deleted file mode 100644
index d991223f3..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc
deleted file mode 100644
index d9ead8495..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  
-  printf("32fc_square_dist_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-  volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(; i < vlen; ++i) {
-    //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h
deleted file mode 100644
index 9d365d8b0..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
deleted file mode 100644
index f923d1d5c..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define ERR_DELTA .0001
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-  float scalar;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  random_floats(&scalar, 1);
-  
-  printf("32fc_square_dist_scalar_mult_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(i = 0; i < vlen; ++i) {
-    printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
deleted file mode 100644
index ac4e3c45b..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
deleted file mode 100644
index d20682147..000000000
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_and_aligned16.h>
-#include <volk/volk_32s_and_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_and_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_and_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-  int32_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_and_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h
deleted file mode 100644
index dfcb47c63..000000000
--- a/volk/lib/qa_32s_and_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H
-#define INCLUDED_QA_32S_AND_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_and_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_and_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc
deleted file mode 100644
index 07d799809..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <volk/volk_32s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h
deleted file mode 100644
index efd2a2eea..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc
deleted file mode 100644
index 2ec610ffb..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <volk/volk_32s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h
deleted file mode 100644
index 5006f5fd8..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
deleted file mode 100644
index bebf779b0..000000000
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_or_aligned16.h>
-#include <volk/volk_32s_or_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_or_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_or_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-  int32_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_or_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h
deleted file mode 100644
index 9e949eb52..000000000
--- a/volk/lib/qa_32s_or_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H
-#define INCLUDED_QA_32S_OR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_or_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_or_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */
diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc
deleted file mode 100644
index 313c786b6..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <volk/volk_32u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint32_t output0[vlen] __attribute__ ((aligned (16)));
-  uint32_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint32_t));
-  printf("32u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h
deleted file mode 100644
index 47bad4c3d..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc
deleted file mode 100644
index 7f9c4584a..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <volk/volk_64f_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h
deleted file mode 100644
index 95d79f73d..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc
deleted file mode 100644
index 98aadbf4d..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <volk/volk_64f_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h
deleted file mode 100644
index 430327e81..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc
deleted file mode 100644
index 76e755514..000000000
--- a/volk/lib/qa_64f_max_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_max_aligned16.h>
-#include <volk/volk_64f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_max_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h
deleted file mode 100644
index 7cbd4d4c1..000000000
--- a/volk/lib/qa_64f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H
-#define INCLUDED_QA_64F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc
deleted file mode 100644
index 4b70d2881..000000000
--- a/volk/lib/qa_64f_min_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_min_aligned16.h>
-#include <volk/volk_64f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_min_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h
deleted file mode 100644
index a0e95395f..000000000
--- a/volk/lib/qa_64f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H
-#define INCLUDED_QA_64F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc
deleted file mode 100644
index 20d012c9e..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <volk/volk_64u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint64_t output0[vlen] __attribute__ ((aligned (16)));
-  uint64_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint64_t));
-  printf("64u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h
deleted file mode 100644
index a4fa0c983..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc
deleted file mode 100644
index 8dd5f76ca..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <volk/volk_8s_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h
deleted file mode 100644
index 38739fc96..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc
deleted file mode 100644
index 12c502d4b..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <volk/volk_8s_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h
deleted file mode 100644
index d39fffc35..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
deleted file mode 100644
index f27e60552..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <volk/volk_8s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h
deleted file mode 100644
index 7f8401d42..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc
deleted file mode 100644
index 43468b1b1..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <volk/volk_8s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen+1] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen+1] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen+1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%e...%e\n", output_generic[i], output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h
deleted file mode 100644
index aad2f8c22..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index f753e1107..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_11[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse4_11[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 9c99fed70..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 29073eed7..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  float output_sse14_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-  }
-}
-
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 63b5fdadb..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 4980c982a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 02050926f..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 3c3f737a1..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> *input0;
-  
-  float* output_generic;
-  float* output_sse;
-  float* output_sse4_1;
-
-  ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t));
-  ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0);
-  }
-
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-
-  free(input0);
-  free(output_generic);
-  free(output_sse);
-  free(output_sse4_1);
-}
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 93338e488..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index a33d1bf30..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 92fc0dd4a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
deleted file mode 100644
index 216bf1cef..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<int16_t>* result_generic;
-  std::complex<int16_t>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t));
-  
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_16sc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
deleted file mode 100644
index 0e78a5eca..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
deleted file mode 100644
index 4c707446e..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float));
-  
-
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
deleted file mode 100644
index eb9ae309c..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
deleted file mode 100644
index 8e7e59768..000000000
--- a/volk/lib/qa_volk.cc
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-/*
- * This class gathers together all the test cases for the example
- * directory into a single test suite.  As you create new test cases,
- * add them here.
- */
-
-#include <qa_volk.h>
-#include <qa_16s_quad_max_star_aligned16.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <qa_32fc_index_max_aligned16.h>
-#include <qa_32f_index_max_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <qa_16s_permute_and_scalar_add_aligned16.h>
-#include <qa_16s_branch_4_state_8_aligned16.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <qa_16s_add_quad_aligned16.h>
-#include <qa_32f_add_aligned16.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <qa_32f_max_aligned16.h>
-#include <qa_32f_min_aligned16.h>
-#include <qa_64f_max_aligned16.h>
-#include <qa_64f_min_aligned16.h>
-#include <qa_32s_and_aligned16.h>
-#include <qa_32s_or_aligned16.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <qa_32f_fm_detect_aligned16.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <qa_32f_divide_aligned16.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <qa_32u_popcnt_aligned16.h>
-#include <qa_64u_popcnt_aligned16.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <qa_32f_power_aligned16.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-
-CppUnit::TestSuite *
-qa_volk::suite()
-{
-  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
-  s->addTest(qa_16s_quad_max_star_aligned16::suite());
-  s->addTest(qa_32fc_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_aligned16::suite());
-  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
-  s->addTest(qa_32fc_index_max_aligned16::suite());
-  s->addTest(qa_32f_index_max_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
-  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
-  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
-  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
-  s->addTest(qa_16s_max_star_aligned16::suite());
-  s->addTest(qa_16s_add_quad_aligned16::suite());
-  s->addTest(qa_32f_add_aligned16::suite());
-  s->addTest(qa_32f_subtract_aligned16::suite());
-  s->addTest(qa_32f_max_aligned16::suite());
-  s->addTest(qa_32f_min_aligned16::suite());
-  s->addTest(qa_64f_max_aligned16::suite());
-  s->addTest(qa_64f_min_aligned16::suite());
-  s->addTest(qa_32s_and_aligned16::suite());
-  s->addTest(qa_32s_or_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_unaligned16::suite());
-  s->addTest(qa_32f_fm_detect_aligned16::suite());
-  //s->addTest(qa_32fc_32f_multiply_aligned16::suite());
-  s->addTest(qa_32fc_multiply_aligned16::suite());
-  s->addTest(qa_32f_divide_aligned16::suite());
-  s->addTest(qa_32f_multiply_aligned16::suite());
-  s->addTest(qa_32f_sqrt_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
-  s->addTest(qa_32u_popcnt_aligned16::suite());
-  s->addTest(qa_64u_popcnt_aligned16::suite());
-  s->addTest(qa_16u_byteswap_aligned16::suite());
-  s->addTest(qa_32u_byteswap_aligned16::suite());
-  s->addTest(qa_64u_byteswap_aligned16::suite());
-  s->addTest(qa_32f_normalize_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
-  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_unaligned16::suite());
-  s->addTest(qa_16s_convert_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_8s_unaligned16::suite());
-  s->addTest(qa_32f_convert_16s_aligned16::suite());
-  s->addTest(qa_32f_convert_16s_unaligned16::suite());
-  s->addTest(qa_32f_convert_32s_aligned16::suite());
-  s->addTest(qa_32f_convert_32s_unaligned16::suite());
-  s->addTest(qa_32f_convert_64f_aligned16::suite());
-  s->addTest(qa_32f_convert_64f_unaligned16::suite());
-  s->addTest(qa_32f_convert_8s_aligned16::suite());
-  s->addTest(qa_32f_convert_8s_unaligned16::suite());
-  s->addTest(qa_32s_convert_32f_aligned16::suite());
-  s->addTest(qa_32s_convert_32f_unaligned16::suite());
-  s->addTest(qa_64f_convert_32f_aligned16::suite());
-  s->addTest(qa_64f_convert_32f_unaligned16::suite());
-  s->addTest(qa_8s_convert_16s_aligned16::suite());
-  s->addTest(qa_8s_convert_16s_unaligned16::suite());
-  s->addTest(qa_8s_convert_32f_aligned16::suite());
-  s->addTest(qa_8s_convert_32f_unaligned16::suite());
-  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
-  s->addTest(qa_32f_power_aligned16::suite());
-  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
-  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
-  s->addTest(qa_32f_accumulator_aligned16::suite());
-  s->addTest(qa_32f_stddev_aligned16::suite());
-  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
-
-  return s;
-}
diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h
deleted file mode 100644
index 43fa7faba..000000000
--- a/volk/lib/qa_volk.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Example Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Example Public License for more details.
- * 
- * You should have received a copy of the GNU Example Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_QA_VOLK_H
-#define INCLUDED_QA_VOLK_H
-
-#include <cppunit/TestSuite.h>
-
-//! collect all the tests for the example directory
-
-class qa_volk {
- public:
-  //! return suite of tests for all of example directory
-  static CppUnit::TestSuite *suite ();
-};
-
-#endif /* INCLUDED_QA_VOLK_H */
diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc
deleted file mode 100644
index 50ac08eab..000000000
--- a/volk/lib/test_all.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2002,2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <cppunit/ui/text/TestRunner.h>
-#include <cppunit/TextTestRunner.h>
-
-#include <qa_volk.h>
-
-#include <cppunit/XmlOutputter.h>
-#include <iostream>
-#include <getopt.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include <fstream>
-
-int 
-main (int argc, char **argv)
-{
-  
-  int opt = 0;
-  std::string xmlOutputFile("");
-
-  while( (opt = getopt(argc, argv, "o:")) != -1){
-    switch(opt){
-    case 'o':
-      if(optarg){
-	xmlOutputFile.assign(optarg);
-      }
-      else{
-	std::cerr << "No xml file output specified for -o" << std::endl;
-	exit(EXIT_FAILURE);
-      }
-      break;
-
-    default: /* '?' */
-      fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n",
-	      argv[0]);
-      exit(EXIT_FAILURE);
-    }
-
-  }
-
-  CppUnit::TextUi::TestRunner runner;
-
-  runner.addTest (qa_volk::suite ());
-
-  bool was_successful = false;
-  if(!xmlOutputFile.empty()){
-    std::ofstream xmlOutput(xmlOutputFile.c_str());
-    if(xmlOutput.is_open()){
-      runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput));
-
-      was_successful = runner.run("", false, true, false);
-    }
-    xmlOutput.close();
-  }
-  else{
-    was_successful = runner.run ("", false);
-  }
-
-  return was_successful ? 0 : 1;
-}
-- 
cgit 


From f832c9789be9fec46e211be4fb2355013d19c000 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 18:24:02 -0800
Subject: Volk: Small changes to speed things up.

---
 volk/lib/qa_utils.cc | 2 +-
 volk/lib/testqa.cc   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 67ce5ddef..9cafd459f 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -32,7 +32,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
         if(type.size == 8) random_floats<double>((double *)data, n);
         else random_floats<float>((float *)data, n);
     } else {
-        float int_max = pow(2, type.size*8);
+        float int_max = float(uint64_t(2) << (type.size*8));
         if(type.is_signed) int_max /= 2.0;
         for(int i=0; i<n; i++) {
             float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 9f4934dc0..4cef7b443 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
-- 
cgit 


From b0a23e876fe0f92afb2c55fd4fbce6427e9598d8 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 25 Jan 2011 15:06:23 -0800
Subject: Volk: doesn't test a routine if no valid architectures other than
 generic are found

---
 volk/lib/qa_utils.cc | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 9cafd459f..6a6f87d85 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -288,6 +288,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     //first let's get a list of available architectures for the test
     std::vector<std::string> arch_list = get_arch_list(archs);
     
+    if(arch_list.size() < 2) {
+        std::cout << "no architectures to test" << std::endl;
+        return false;
+    }
+    
     //now we have to get a function signature by parsing the name
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
-- 
cgit 


From e979880d446949b2d2a93087011579c383369819 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Thu, 13 Jan 2011 18:57:48 +0000
Subject: Volk: QA util has proper free().

---
 volk/lib/qa_utils.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 6a6f87d85..e85e2c1bc 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -309,10 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
     //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
     std::vector<void *> inbuffs;
-
+    std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later.
+                                    //we need it because we dupe void*s in test_data below.
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     for(int i=0; i<inbuffs.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);        
+        load_random_data(inbuffs[i], inputsig[i], vlen);   
+        free_buffs.push_back(inbuffs[i]);
     }
     
     //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
@@ -321,6 +323,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         std::vector<void *> arch_buffs;
         for(int j=0; j<outputsig.size(); j++) {
             arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
+            free_buffs.push_back(arch_buffs.back());
         }
         for(int j=0; j<inputsig.size(); j++) {
             arch_buffs.push_back(inbuffs[j]);
@@ -433,6 +436,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
             }
         }
     }
+
+    BOOST_FOREACH(void *buf, free_buffs) {
+        free(buf);
+    }
+
     return fail_global;
 }
 
-- 
cgit 


From 060df0d1fe23c07a0ba2f0242f22073dc62626c1 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 25 Jan 2011 16:28:21 -0800
Subject: Volk: uses m4 magic to find boost_unit_test_framework

---
 volk/lib/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index bbc993fa2..afd29a352 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -121,7 +121,7 @@ noinst_PROGRAMS = \
 
 testqa_SOURCES = testqa.cc qa_utils.cc
 testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
-testqa_LDFLAGS = -lboost_unit_test_framework
+testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
 if LV_HAVE_ORC
 testqa_LDADD  = \
 	libvolk.la \
-- 
cgit 


From 2a4c4f89187bf75caa34c7bc52fc32310a75c9f2 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:28:35 -0800
Subject: Volk: fixed volk_8i_s32f_convert_32f_a16_orc_impl.

---
 volk/lib/testqa.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 4cef7b443..d6b9e347d 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -49,8 +49,8 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
@@ -60,7 +60,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
-- 
cgit 


From 5ebd9ef2580aa36cd3a636c6257bd4b80b2380f8 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:44:40 -0800
Subject: Volk: find built headers instead of installed ones

---
 volk/lib/Makefile.am | 2 +-
 volk/lib/testqa.cc   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index afd29a352..6f3d7fd86 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -120,7 +120,7 @@ noinst_PROGRAMS = \
 	testqa
 
 testqa_SOURCES = testqa.cc qa_utils.cc
-testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
+testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS)
 testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
 if LV_HAVE_ORC
 testqa_LDADD  = \
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index d6b9e347d..e9734411b 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -1,6 +1,6 @@
 #include "qa_utils.h"
-#include "../include/volk/volk.h"
-#include "../include/volk/volk_registry.h"
+#include <volk/volk.h>
+#include <volk/volk_registry.h>
 #include <boost/test/unit_test.hpp>
 
 BOOST_AUTO_TEST_CASE(volk_test_all) {    
-- 
cgit 


From e34a484084a5224ec3412bd7d6c6f285301f5d43 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:47:56 -0800
Subject: Volk: renamed volk_32fc_32f_power_32fc_a16 to
 volk_32fc_s32f_power_32fc_a16

---
 volk/lib/testqa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index e9734411b..f33670856 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
     VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
-- 
cgit 


From 6503e3b21978b71908400c994148836bec4a97b9 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 30 Jan 2011 12:35:07 -0500
Subject: volk: Updating build structure to work when orc is not installed.

Distcheck passes for me if liborc is installed or not.
---
 volk/lib/Makefile.am | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 6f3d7fd86..af7c7f335 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -45,7 +45,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
 
 
 # list of programs run by "make check" and "make distcheck"
-TESTS = testqa
+#TESTS = testqa
 #orc stuff gets built in the ORC directory conditional to ORC being enabled.
 #it gets linked in during the build of libvolk as an added library.
 #there might be a better way to do this.
@@ -77,7 +77,7 @@ libvolk_la_SOURCES = 		\
 volk_orc_LDFLAGS = \
 	$(ORC_LDFLAGS) \
 	-lorc-0.4
-	
+
 volk_orc_LIBADD = \
 	../orc/libvolk_orc.la
 
@@ -103,7 +103,6 @@ endif
 #libvolk_qa_la_LIBADD = \
 #	libvolk.la \
 #	libvolk_runtime.la
-	
 
 # ----------------------------------------------------------------
 # headers that don't get installed
-- 
cgit 


From 736874202f15222fa3ec10ceeb1815e8a595ed3a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 2 Feb 2011 13:55:15 -0500
Subject: volk: cleaning up makefile issues after merge.

---
 volk/lib/Makefile.am | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index af7c7f335..3e5502369 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -57,7 +57,8 @@ lib_LTLIBRARIES = \
 EXTRA_DIST = \
 	volk_mktables.c		\
 	volk_rank_archs.h 	\
-	volk_proccpu_sim.c
+	volk_proccpu_sim.c	\
+	gcc_x86_cpuid.h
 
 # ----------------------------------------------------------------
 #                      The main library
@@ -109,8 +110,7 @@ endif
 # ----------------------------------------------------------------
 noinst_HEADERS = \
 	volk_init.h \
-	qa_utils.h \
-	assembly.h
+	qa_utils.h
 
 # ----------------------------------------------------------------
 # Our test program
-- 
cgit 


From b806f6e95cd917e54884841c8e7928204ecd78f8 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 2 Feb 2011 14:21:46 -0500
Subject: volk: updating to readd unaligned dot product under new name scheme.

---
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 ---------------
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h  |  18 --
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc | 138 +++++++++++++++
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h  |  18 ++
 volk/lib/qa_volk.cc                              | 213 -----------------------
 volk/lib/testqa.cc                               |   1 +
 6 files changed, 157 insertions(+), 369 deletions(-)
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
 create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
 create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
 delete mode 100644 volk/lib/qa_volk.cc

(limited to 'volk/lib')

diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
deleted file mode 100644
index a0680bab6..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_conjugate_dot_prod_unaligned.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse");
-
-  printf("32fc_conjugate_dot_prod_unaligned\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32");
-
-  printf("32fc_conjugate_dot_prod_unaligned\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#else
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
deleted file mode 100644
index 7aead53a1..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
-#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
new file mode 100644
index 000000000..fefdf06ee
--- /dev/null
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
@@ -0,0 +1,138 @@
+#include <volk/volk.h>
+#include <qa_32fc_x2_conjugate_dot_prod_32fc_u.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  const int vlen = 789743;
+
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse");
+
+  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#elif LV_HAVE_SSE && LV_HAVE_32
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32");
+
+  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#else
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
new file mode 100644
index 000000000..f07402403
--- /dev/null
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
+#define INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_x2_conjugate_dot_prod_32fc_u : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_x2_conjugate_dot_prod_32fc_u);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
deleted file mode 100644
index 98d3e9728..000000000
--- a/volk/lib/qa_volk.cc
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-/*
- * This class gathers together all the test cases for the example
- * directory into a single test suite.  As you create new test cases,
- * add them here.
- */
-
-#include <qa_volk.h>
-#include <qa_16s_quad_max_star_aligned16.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <qa_32fc_index_max_aligned16.h>
-#include <qa_32f_index_max_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_unaligned.h>
-#include <qa_16s_permute_and_scalar_add_aligned16.h>
-#include <qa_16s_branch_4_state_8_aligned16.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <qa_16s_add_quad_aligned16.h>
-#include <qa_32f_add_aligned16.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <qa_32f_max_aligned16.h>
-#include <qa_32f_min_aligned16.h>
-#include <qa_64f_max_aligned16.h>
-#include <qa_64f_min_aligned16.h>
-#include <qa_32s_and_aligned16.h>
-#include <qa_32s_or_aligned16.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <qa_32f_fm_detect_aligned16.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <qa_32f_divide_aligned16.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <qa_32u_popcnt_aligned16.h>
-#include <qa_64u_popcnt_aligned16.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <qa_32f_power_aligned16.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-
-CppUnit::TestSuite *
-qa_volk::suite()
-{
-  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
-  s->addTest(qa_16s_quad_max_star_aligned16::suite());
-  s->addTest(qa_32fc_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_aligned16::suite());
-  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
-  s->addTest(qa_32fc_index_max_aligned16::suite());
-  s->addTest(qa_32f_index_max_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite());
-  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
-  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
-  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
-  s->addTest(qa_16s_max_star_aligned16::suite());
-  s->addTest(qa_16s_add_quad_aligned16::suite());
-  s->addTest(qa_32f_add_aligned16::suite());
-  s->addTest(qa_32f_subtract_aligned16::suite());
-  s->addTest(qa_32f_max_aligned16::suite());
-  s->addTest(qa_32f_min_aligned16::suite());
-  s->addTest(qa_64f_max_aligned16::suite());
-  s->addTest(qa_64f_min_aligned16::suite());
-  s->addTest(qa_32s_and_aligned16::suite());
-  s->addTest(qa_32s_or_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_unaligned16::suite());
-  s->addTest(qa_32f_fm_detect_aligned16::suite());
-  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
-  s->addTest(qa_32fc_multiply_aligned16::suite());
-  s->addTest(qa_32f_divide_aligned16::suite());
-  s->addTest(qa_32f_multiply_aligned16::suite());
-  s->addTest(qa_32f_sqrt_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
-  s->addTest(qa_32u_popcnt_aligned16::suite());
-  s->addTest(qa_64u_popcnt_aligned16::suite());
-  s->addTest(qa_16u_byteswap_aligned16::suite());
-  s->addTest(qa_32u_byteswap_aligned16::suite());
-  s->addTest(qa_64u_byteswap_aligned16::suite());
-  s->addTest(qa_32f_normalize_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
-  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_unaligned16::suite());
-  s->addTest(qa_16s_convert_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_8s_unaligned16::suite());
-  s->addTest(qa_32f_convert_16s_aligned16::suite());
-  s->addTest(qa_32f_convert_16s_unaligned16::suite());
-  s->addTest(qa_32f_convert_32s_aligned16::suite());
-  s->addTest(qa_32f_convert_32s_unaligned16::suite());
-  s->addTest(qa_32f_convert_64f_aligned16::suite());
-  s->addTest(qa_32f_convert_64f_unaligned16::suite());
-  s->addTest(qa_32f_convert_8s_aligned16::suite());
-  s->addTest(qa_32f_convert_8s_unaligned16::suite());
-  s->addTest(qa_32s_convert_32f_aligned16::suite());
-  s->addTest(qa_32s_convert_32f_unaligned16::suite());
-  s->addTest(qa_64f_convert_32f_aligned16::suite());
-  s->addTest(qa_64f_convert_32f_unaligned16::suite());
-  s->addTest(qa_8s_convert_16s_aligned16::suite());
-  s->addTest(qa_8s_convert_16s_unaligned16::suite());
-  s->addTest(qa_8s_convert_32f_aligned16::suite());
-  s->addTest(qa_8s_convert_32f_unaligned16::suite());
-  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
-  s->addTest(qa_32f_power_aligned16::suite());
-  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
-  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
-  s->addTest(qa_32f_accumulator_aligned16::suite());
-  s->addTest(qa_32f_stddev_aligned16::suite());
-  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
-
-  return s;
-}
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f33670856..779bc61eb 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
-- 
cgit