7 files changed, 547 insertions, 181 deletions
diff --git a/volk/lib/assembly.h b/volk/lib/assembly.h
deleted file mode 100644
index 8a99aa07c..000000000
--- a/volk/lib/assembly.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2002 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef _ASSEMBLY_H_
-#define _ASSEMBLY_H_
-
-#if defined (__APPLE__) && defined (__APPLE_CC__)
-
-// XCode ignores the .scl and .type functions in XCode 2.2.1 and 2.3,
-// but creates an error in XCode 2.4.  Just ignore them.
-
-#define GLOB_SYMB(f)    _ ## f
-
-#define DEF_FUNC_HEAD(f)  /* none */
-
-#define FUNC_TAIL(f)    /* none*/
-
-#elif !defined (__ELF__)
-
-/*
- * Too bad, the following define does not work as expected --SF
- * 	#define GLOB_SYMB(f)	__USER_LABEL_PREFIX__ ## f
- */
-#define GLOB_SYMB(f)	_ ## f
-
-#define DEF_FUNC_HEAD(f)	\
-	.def	GLOB_SYMB(f); .scl 2; .type 32; .endef
-
-#define FUNC_TAIL(f)	/* none */
-
-
-#else	/* !__ELF__ */
-
-
-#define GLOB_SYMB(f)	f
-
-#define DEF_FUNC_HEAD(f)	\
-	.type	GLOB_SYMB(f),@function	\
-
-#define FUNC_TAIL(f)	\
-  .Lfe1:		\
-	.size	GLOB_SYMB(f),.Lfe1-GLOB_SYMB(f)
-
-
-#endif	/* !__ELF__ */
-
-
-#endif /* _ASSEMBLY_H_ */
diff --git a/volk/lib/cpuid_x86.S b/volk/lib/cpuid_x86.S
deleted file mode 100644
index 4e1a9404f..000000000
--- a/volk/lib/cpuid_x86.S
+++ /dev/null
@@ -1,60 +0,0 @@
-#	
-# Copyright 2003 Free Software Foundation, Inc.
-# 
-# This file is part of GNU Radio
-# 
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-# 
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-# 
-
-#
-# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
-#
-#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
-#
-
-#include "assembly.h"
-
-.file "cpuid_x86.S"
-	.version	"01.01"
-.text
-.globl	GLOB_SYMB(cpuid_x86)
-	DEF_FUNC_HEAD(cpuid_x86)
-GLOB_SYMB(cpuid_x86):
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx		# must save in PIC mode, holds GOT pointer
-	pushl	%esi
-	
-	movl	8(%ebp), %eax	# op
-	movl	12(%ebp), %esi	# result
-	cpuid
-	movl	%eax, 0(%esi)
-	movl	%ebx, 4(%esi)
-	movl	%ecx, 8(%esi)
-	movl	%edx, 12(%esi)
-	
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-
-FUNC_TAIL(cpuid_x86)
-	.ident	"Hand coded cpuid assembly"
-	
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/volk/lib/cpuid_x86_64.S b/volk/lib/cpuid_x86_64.S
deleted file mode 100644
index 32b1847cd..000000000
--- a/volk/lib/cpuid_x86_64.S
+++ /dev/null
@@ -1,54 +0,0 @@
-#	
-# Copyright 2003,2005 Free Software Foundation, Inc.
-# 
-# This file is part of GNU Radio
-# 
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-# 
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-# 
-
-#
-# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
-#
-#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
-#
-
-#include "assembly.h"
-
-.file "cpuid_x86_64.S"
-	.version	"01.01"
-.text
-.globl	GLOB_SYMB(cpuid_x86)
-	DEF_FUNC_HEAD(cpuid_x86)
-GLOB_SYMB(cpuid_x86):
-	mov	%rbx, %r11	# must save in PIC mode, holds GOT pointer
-	
-	mov	%rdi, %rax	# op
-	cpuid
-	movl	%eax, 0(%rsi)	# result
-	movl	%ebx, 4(%rsi)
-	movl	%ecx, 8(%rsi)
-	movl	%edx, 12(%rsi)
-	
-	mov	%r11, %rbx
-	retq
-
-FUNC_TAIL(cpuid_x86)
-	.ident	"Hand coded cpuid64 assembly"
-	
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
new file mode 100644
index 000000000..2d0916fb3
--- /dev/null
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_PCLMUL	(1 << 1)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_AES		(1 << 25)
+#define bit_XSAVE	(1 << 26)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)
+
+/* %edx */
+#define bit_CMPXCHG8B	(1 << 8)
+#define bit_CMOV	(1 << 15)
+#define bit_MMX		(1 << 23)
+#define bit_FXSAVE	(1 << 24)
+#define bit_SSE		(1 << 25)
+#define bit_SSE2	(1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+#define bit_SSE4a	(1 << 6)
+#define bit_SSE5	(1 << 11)
+
+/* %edx */
+#define bit_LM		(1 << 29)
+#define bit_3DNOWP	(1 << 30)
+#define bit_3DNOW	(1 << 31)
+
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register.  */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+#else
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction.  ext can
+   be either 0x0 or 0x8000000 to return highest supported value for
+   basic or extended cpuid information.  Function returns 0 if cpuid
+   is not supported or whatever cpuid returns in eax register.  If sig
+   pointer is non-null, then first four bytes of the signature
+   (as found in ebx register) are returned in location pointed by sig.  */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+  unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+#if __GNUC__ >= 3
+  /* See if we can use cpuid.  On AMD64 we always can.  */
+  __asm__ ("pushf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "mov{l}\t{%0, %1|%1, %0}\n\t"
+	   "xor{l}\t{%2, %0|%0, %2}\n\t"
+	   "push{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+  __asm__ ("pushfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "movl\t%0, %1\n\t"
+	   "xorl\t%2, %0\n\t"
+	   "pushl\t%0\n\t"
+	   "popfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "popfl\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#endif
+
+  if (!((__eax ^ __ebx) & 0x00200000))
+    return 0;
+#endif
+
+  /* Host supports cpuid.  Return highest supported cpuid input value.  */
+  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+  if (__sig)
+    *__sig = __ebx;
+
+  return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+   eax, ebx, ecx and edx registers.  The function checks if cpuid is
+   supported and returns 1 for valid cpuid information or 0 for
+   unsupported cpuid level.  All pointers are required to be non-null.  */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+	     unsigned int *__eax, unsigned int *__ebx,
+	     unsigned int *__ecx, unsigned int *__edx)
+{
+  unsigned int __ext = __level & 0x80000000;
+
+  if (__get_cpuid_max (__ext, 0) < __level)
+    return 0;
+
+  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
+}
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
new file mode 100644
index 000000000..a0680bab6
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
@@ -0,0 +1,138 @@
+#include <volk/volk.h>
+#include <qa_32fc_conjugate_dot_prod_unaligned.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse");
+
+  printf("32fc_conjugate_dot_prod_unaligned\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#elif LV_HAVE_SSE && LV_HAVE_32
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32");
+
+  printf("32fc_conjugate_dot_prod_unaligned\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#else
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
new file mode 100644
index 000000000..7aead53a1
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
+#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
new file mode 100644
index 000000000..98d3e9728
--- /dev/null
+++ b/volk/lib/qa_volk.cc
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * This class gathers together all the test cases for the example
+ * directory into a single test suite.  As you create new test cases,
+ * add them here.
+ */
+
+#include <qa_volk.h>
+#include <qa_16s_quad_max_star_aligned16.h>
+#include <qa_32fc_dot_prod_aligned16.h>
+#include <qa_32fc_square_dist_aligned16.h>
+#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
+#include <qa_32f_sum_of_poly_aligned16.h>
+#include <qa_32fc_index_max_aligned16.h>
+#include <qa_32f_index_max_aligned16.h>
+#include <qa_32fc_conjugate_dot_prod_aligned16.h>
+#include <qa_32fc_conjugate_dot_prod_unaligned.h>
+#include <qa_16s_permute_and_scalar_add_aligned16.h>
+#include <qa_16s_branch_4_state_8_aligned16.h>
+#include <qa_16s_max_star_horizontal_aligned16.h>
+#include <qa_16s_max_star_aligned16.h>
+#include <qa_16s_add_quad_aligned16.h>
+#include <qa_32f_add_aligned16.h>
+#include <qa_32f_subtract_aligned16.h>
+#include <qa_32f_max_aligned16.h>
+#include <qa_32f_min_aligned16.h>
+#include <qa_64f_max_aligned16.h>
+#include <qa_64f_min_aligned16.h>
+#include <qa_32s_and_aligned16.h>
+#include <qa_32s_or_aligned16.h>
+#include <qa_32f_dot_prod_aligned16.h>
+#include <qa_32f_dot_prod_unaligned16.h>
+#include <qa_32f_fm_detect_aligned16.h>
+#include <qa_32fc_32f_multiply_aligned16.h>
+#include <qa_32fc_multiply_aligned16.h>
+#include <qa_32f_divide_aligned16.h>
+#include <qa_32f_multiply_aligned16.h>
+#include <qa_32f_sqrt_aligned16.h>
+#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
+#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
+#include <qa_32u_popcnt_aligned16.h>
+#include <qa_64u_popcnt_aligned16.h>
+#include <qa_16u_byteswap_aligned16.h>
+#include <qa_32u_byteswap_aligned16.h>
+#include <qa_64u_byteswap_aligned16.h>
+#include <qa_32f_normalize_aligned16.h>
+#include <qa_16sc_deinterleave_16s_aligned16.h>
+#include <qa_16sc_deinterleave_32f_aligned16.h>
+#include <qa_16sc_deinterleave_real_16s_aligned16.h>
+#include <qa_16sc_deinterleave_real_32f_aligned16.h>
+#include <qa_16sc_deinterleave_real_8s_aligned16.h>
+#include <qa_16sc_magnitude_16s_aligned16.h>
+#include <qa_16sc_magnitude_32f_aligned16.h>
+#include <qa_32fc_deinterleave_32f_aligned16.h>
+#include <qa_32fc_deinterleave_64f_aligned16.h>
+#include <qa_32fc_deinterleave_real_16s_aligned16.h>
+#include <qa_32fc_deinterleave_real_32f_aligned16.h>
+#include <qa_32fc_deinterleave_real_64f_aligned16.h>
+#include <qa_32fc_magnitude_16s_aligned16.h>
+#include <qa_32fc_magnitude_32f_aligned16.h>
+#include <qa_32f_interleave_16sc_aligned16.h>
+#include <qa_32f_interleave_32fc_aligned16.h>
+#include <qa_8sc_deinterleave_16s_aligned16.h>
+#include <qa_8sc_deinterleave_32f_aligned16.h>
+#include <qa_8sc_deinterleave_real_16s_aligned16.h>
+#include <qa_8sc_deinterleave_real_32f_aligned16.h>
+#include <qa_8sc_deinterleave_real_8s_aligned16.h>
+#include <qa_16s_convert_32f_aligned16.h>
+#include <qa_16s_convert_32f_unaligned16.h>
+#include <qa_16s_convert_8s_aligned16.h>
+#include <qa_16s_convert_8s_unaligned16.h>
+#include <qa_32f_convert_16s_aligned16.h>
+#include <qa_32f_convert_16s_unaligned16.h>
+#include <qa_32f_convert_32s_aligned16.h>
+#include <qa_32f_convert_32s_unaligned16.h>
+#include <qa_32f_convert_64f_aligned16.h>
+#include <qa_32f_convert_64f_unaligned16.h>
+#include <qa_32f_convert_8s_aligned16.h>
+#include <qa_32f_convert_8s_unaligned16.h>
+#include <qa_32s_convert_32f_aligned16.h>
+#include <qa_32s_convert_32f_unaligned16.h>
+#include <qa_64f_convert_32f_aligned16.h>
+#include <qa_64f_convert_32f_unaligned16.h>
+#include <qa_8s_convert_16s_aligned16.h>
+#include <qa_8s_convert_16s_unaligned16.h>
+#include <qa_8s_convert_32f_aligned16.h>
+#include <qa_8s_convert_32f_unaligned16.h>
+#include <qa_32fc_32f_power_32fc_aligned16.h>
+#include <qa_32f_power_aligned16.h>
+#include <qa_32fc_atan2_32f_aligned16.h>
+#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
+#include <qa_32fc_power_spectrum_32f_aligned16.h>
+#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
+#include <qa_32f_accumulator_aligned16.h>
+#include <qa_32f_stddev_aligned16.h>
+#include <qa_32f_stddev_and_mean_aligned16.h>
+
+CppUnit::TestSuite *
+qa_volk::suite()
+{
+  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
+
+  s->addTest(qa_16s_quad_max_star_aligned16::suite());
+  s->addTest(qa_32fc_dot_prod_aligned16::suite());
+  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
+  s->addTest(qa_32fc_square_dist_aligned16::suite());
+  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
+  s->addTest(qa_32fc_index_max_aligned16::suite());
+  s->addTest(qa_32f_index_max_aligned16::suite());
+  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
+  s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite());
+  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
+  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
+  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
+  s->addTest(qa_16s_max_star_aligned16::suite());
+  s->addTest(qa_16s_add_quad_aligned16::suite());
+  s->addTest(qa_32f_add_aligned16::suite());
+  s->addTest(qa_32f_subtract_aligned16::suite());
+  s->addTest(qa_32f_max_aligned16::suite());
+  s->addTest(qa_32f_min_aligned16::suite());
+  s->addTest(qa_64f_max_aligned16::suite());
+  s->addTest(qa_64f_min_aligned16::suite());
+  s->addTest(qa_32s_and_aligned16::suite());
+  s->addTest(qa_32s_or_aligned16::suite());
+  s->addTest(qa_32f_dot_prod_aligned16::suite());
+  s->addTest(qa_32f_dot_prod_unaligned16::suite());
+  s->addTest(qa_32f_fm_detect_aligned16::suite());
+  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
+  s->addTest(qa_32fc_multiply_aligned16::suite());
+  s->addTest(qa_32f_divide_aligned16::suite());
+  s->addTest(qa_32f_multiply_aligned16::suite());
+  s->addTest(qa_32f_sqrt_aligned16::suite());
+  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
+  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
+  s->addTest(qa_32u_popcnt_aligned16::suite());
+  s->addTest(qa_64u_popcnt_aligned16::suite());
+  s->addTest(qa_16u_byteswap_aligned16::suite());
+  s->addTest(qa_32u_byteswap_aligned16::suite());
+  s->addTest(qa_64u_byteswap_aligned16::suite());
+  s->addTest(qa_32f_normalize_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
+  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
+  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
+  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
+  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
+  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
+  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
+  s->addTest(qa_16s_convert_32f_aligned16::suite());
+  s->addTest(qa_16s_convert_32f_unaligned16::suite());
+  s->addTest(qa_16s_convert_8s_aligned16::suite());
+  s->addTest(qa_16s_convert_8s_unaligned16::suite());
+  s->addTest(qa_32f_convert_16s_aligned16::suite());
+  s->addTest(qa_32f_convert_16s_unaligned16::suite());
+  s->addTest(qa_32f_convert_32s_aligned16::suite());
+  s->addTest(qa_32f_convert_32s_unaligned16::suite());
+  s->addTest(qa_32f_convert_64f_aligned16::suite());
+  s->addTest(qa_32f_convert_64f_unaligned16::suite());
+  s->addTest(qa_32f_convert_8s_aligned16::suite());
+  s->addTest(qa_32f_convert_8s_unaligned16::suite());
+  s->addTest(qa_32s_convert_32f_aligned16::suite());
+  s->addTest(qa_32s_convert_32f_unaligned16::suite());
+  s->addTest(qa_64f_convert_32f_aligned16::suite());
+  s->addTest(qa_64f_convert_32f_unaligned16::suite());
+  s->addTest(qa_8s_convert_16s_aligned16::suite());
+  s->addTest(qa_8s_convert_16s_unaligned16::suite());
+  s->addTest(qa_8s_convert_32f_aligned16::suite());
+  s->addTest(qa_8s_convert_32f_unaligned16::suite());
+  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
+  s->addTest(qa_32f_power_aligned16::suite());
+  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
+  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
+  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
+  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
+  s->addTest(qa_32f_accumulator_aligned16::suite());
+  s->addTest(qa_32f_stddev_aligned16::suite());
+  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
+
+  return s;
+}