285 files changed, 2241 insertions, 11752 deletions
diff --git a/volk/Makefile.am b/volk/Makefile.am
index 4c6951ca7..03c5aac35 100644
--- a/volk/Makefile.am
+++ b/volk/Makefile.am
@@ -24,7 +24,12 @@ ACLOCAL_AMFLAGS = -I config
 include $(top_srcdir)/Makefile.common
 
 EXTRA_DIST = bootstrap configure config.h.in volk_config.h
-SUBDIRS = config include lib
+SUBDIRS = config
+if LV_HAVE_ORC
+SUBDIRS += orc
+endif
+SUBDIRS += include lib
+
 #if USE_PYTHON
 #SUBDIRS += python 
 #endif
@@ -53,4 +58,6 @@ distclean-local:
 	-rm -f include/Makefile.in
 	-rm -f lib/Makefile.in
 	-rm -f python/Makefile.in
-	-rm -f configure
-\ No newline at end of file
+	-rm -f configure
+	-rm -f orc/Makefile.in
+	-rm -f orc/*.c
diff --git a/volk/config.guess b/volk/config.guess
index 285237846..405bc3235 100755..120000
--- a/volk/config.guess
+++ b/volk/config.guess
@@ -1,1505 +1 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-#   Free Software Foundation, Inc.
-
-timestamp='2010-08-21'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Originally written by Per Bothner.  Please send patches (context
-# diff format) to <config-patches@gnu.org> and include a ChangeLog
-# entry.
-#
-# This script attempts to guess a canonical system name similar to
-# config.sub.  If it succeeds, it prints the system name on stdout, and
-# exits with 0.  Otherwise, it exits with 1.
-#
-# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION]
-
-Output the configuration name of the system \`$me' is run on.
-
-Operation modes:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.guess ($timestamp)
-
-Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-    * )
-       break ;;
-  esac
-done
-
-if test $# != 0; then
-  echo "$me: too many arguments$help" >&2
-  exit 1
-fi
-
-trap 'exit 1' HUP INT TERM
-
-# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
-# compiler to aid in system detection is discouraged as it requires
-# temporary files to be created and, as you can see below, it is a
-# headache to deal with in a portable fashion.
-
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
-
-# Portable tmp directory creation inspired by the Autoconf team.
-
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" HUP INT PIPE TERM ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,)    echo "int x;" > $dummy.c ;
-	for c in cc gcc c89 c99 ; do
-	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
-	     CC_FOR_BUILD="$c"; break ;
-	  fi ;
-	done ;
-	if test x"$CC_FOR_BUILD" = x ; then
-	  CC_FOR_BUILD=no_compiler_found ;
-	fi
-	;;
- ,,*)   CC_FOR_BUILD=$CC ;;
- ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
-	PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-
-# Note: order is significant - the case branches are not exclusive.
-
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
-    *:NetBSD:*:*)
-	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
-	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
-	# switched to ELF, *-*-netbsd* would select the old
-	# object file format.  This provides both forward
-	# compatibility and a consistent mechanism for selecting the
-	# object file format.
-	#
-	# Note: NetBSD doesn't particularly care about the vendor
-	# portion of the name.  We always set it to "unknown".
-	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
-	case "${UNAME_MACHINE_ARCH}" in
-	    armeb) machine=armeb-unknown ;;
-	    arm*) machine=arm-unknown ;;
-	    sh3el) machine=shl-unknown ;;
-	    sh3eb) machine=sh-unknown ;;
-	    sh5el) machine=sh5le-unknown ;;
-	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
-	esac
-	# The Operating System including object format, if it has switched
-	# to ELF recently, or will in the future.
-	case "${UNAME_MACHINE_ARCH}" in
-	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
-		eval $set_cc_for_build
-		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep -q __ELF__
-		then
-		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
-		    # Return netbsd for either.  FIX?
-		    os=netbsd
-		else
-		    os=netbsdelf
-		fi
-		;;
-	    *)
-	        os=netbsd
-		;;
-	esac
-	# The OS release
-	# Debian GNU/NetBSD machines have a different userland, and
-	# thus, need a distinct triplet. However, they do not need
-	# kernel version information, so it can be replaced with a
-	# suitable tag, in the style of linux-gnu.
-	case "${UNAME_VERSION}" in
-	    Debian*)
-		release='-gnu'
-		;;
-	    *)
-		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-		;;
-	esac
-	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
-	# contains redundant information, the shorter form:
-	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}"
-	exit ;;
-    *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
-	exit ;;
-    *:ekkoBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
-	exit ;;
-    *:SolidBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
-	exit ;;
-    macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
-	exit ;;
-    *:MirBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
-	exit ;;
-    alpha:OSF1:*:*)
-	case $UNAME_RELEASE in
-	*4.0)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
-		;;
-	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
-		;;
-	esac
-	# According to Compaq, /usr/sbin/psrinfo has been available on
-	# OSF/1 and Tru64 systems produced since 1995.  I hope that
-	# covers most systems running today.  This code pipes the CPU
-	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
-	case "$ALPHA_CPU_TYPE" in
-	    "EV4 (21064)")
-		UNAME_MACHINE="alpha" ;;
-	    "EV4.5 (21064)")
-		UNAME_MACHINE="alpha" ;;
-	    "LCA4 (21066/21068)")
-		UNAME_MACHINE="alpha" ;;
-	    "EV5 (21164)")
-		UNAME_MACHINE="alphaev5" ;;
-	    "EV5.6 (21164A)")
-		UNAME_MACHINE="alphaev56" ;;
-	    "EV5.6 (21164PC)")
-		UNAME_MACHINE="alphapca56" ;;
-	    "EV5.7 (21164PC)")
-		UNAME_MACHINE="alphapca57" ;;
-	    "EV6 (21264)")
-		UNAME_MACHINE="alphaev6" ;;
-	    "EV6.7 (21264A)")
-		UNAME_MACHINE="alphaev67" ;;
-	    "EV6.8CB (21264C)")
-		UNAME_MACHINE="alphaev68" ;;
-	    "EV6.8AL (21264B)")
-		UNAME_MACHINE="alphaev68" ;;
-	    "EV6.8CX (21264D)")
-		UNAME_MACHINE="alphaev68" ;;
-	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE="alphaev69" ;;
-	    "EV7 (21364)")
-		UNAME_MACHINE="alphaev7" ;;
-	    "EV7.9 (21364A)")
-		UNAME_MACHINE="alphaev79" ;;
-	esac
-	# A Pn.n version is a patched version.
-	# A Vn.n version is a released version.
-	# A Tn.n version is a released field test version.
-	# A Xn.n version is an unreleased experimental baselevel.
-	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
-    Alpha\ *:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# Should we change UNAME_MACHINE based on the output of uname instead
-	# of the specific Alpha model?
-	echo alpha-pc-interix
-	exit ;;
-    21064:Windows_NT:50:3)
-	echo alpha-dec-winnt3.5
-	exit ;;
-    Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-unknown-sysv4
-	exit ;;
-    *:[Aa]miga[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-amigaos
-	exit ;;
-    *:[Mm]orph[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-morphos
-	exit ;;
-    *:OS/390:*:*)
-	echo i370-ibm-openedition
-	exit ;;
-    *:z/VM:*:*)
-	echo s390-ibm-zvmoe
-	exit ;;
-    *:OS400:*:*)
-        echo powerpc-ibm-os400
-	exit ;;
-    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix${UNAME_RELEASE}
-	exit ;;
-    arm:riscos:*:*|arm:RISCOS:*:*)
-	echo arm-unknown-riscos
-	exit ;;
-    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit ;;
-    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
-	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "`(/bin/universe) 2>/dev/null`" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit ;;
-    NILE*:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit ;;
-    DRS?6000:unix:4.0:6*)
-	echo sparc-icl-nx6
-	exit ;;
-    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case `/usr/bin/uname -p` in
-	    sparc) echo sparc-icl-nx7; exit ;;
-	esac ;;
-    s390x:SunOS:*:*)
-	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux${UNAME_RELEASE}
-	exit ;;
-    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
-	eval $set_cc_for_build
-	SUN_ARCH="i386"
-	# If there is a compiler, see if it is configured for 64-bit objects.
-	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
-	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
-	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_64BIT_ARCH >/dev/null
-	    then
-		SUN_ARCH="x86_64"
-	    fi
-	fi
-	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    sun4*:SunOS:6*:*)
-	# According to config.sub, this is the proper way to canonicalize
-	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
-	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    sun4*:SunOS:*:*)
-	case "`/usr/bin/arch -k`" in
-	    Series*|S4*)
-		UNAME_RELEASE=`uname -v`
-		;;
-	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
-	exit ;;
-    sun3*:SunOS:*:*)
-	echo m68k-sun-sunos${UNAME_RELEASE}
-	exit ;;
-    sun*:*:4.2BSD:*)
-	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
-	case "`/bin/arch`" in
-	    sun3)
-		echo m68k-sun-sunos${UNAME_RELEASE}
-		;;
-	    sun4)
-		echo sparc-sun-sunos${UNAME_RELEASE}
-		;;
-	esac
-	exit ;;
-    aushp:SunOS:*:*)
-	echo sparc-auspex-sunos${UNAME_RELEASE}
-	exit ;;
-    # The situation for MiNT is a little confusing.  The machine name
-    # can be virtually everything (everything which is not
-    # "atarist" or "atariste" at least should have a processor
-    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
-    # to the lowercase version "mint" (or "freemint").  Finally
-    # the system name "TOS" denotes a system which is actually not
-    # MiNT.  But MiNT is downward compatible to TOS, so this should
-    # be no problem.
-    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
-	exit ;;
-    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
-    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
-	exit ;;
-    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
-    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
-    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
-    m68k:machten:*:*)
-	echo m68k-apple-machten${UNAME_RELEASE}
-	exit ;;
-    powerpc:machten:*:*)
-	echo powerpc-apple-machten${UNAME_RELEASE}
-	exit ;;
-    RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit ;;
-    RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix${UNAME_RELEASE}
-	exit ;;
-    VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix${UNAME_RELEASE}
-	exit ;;
-    2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix${UNAME_RELEASE}
-	exit ;;
-    mips:*:*:UMIPS | mips:*:*:RISCos)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-#ifdef __cplusplus
-#include <stdio.h>  /* for printf() prototype */
-	int main (int argc, char *argv[]) {
-#else
-	int main (argc, argv) int argc; char *argv[]; {
-#endif
-	#if defined (host_mips) && defined (MIPSEB)
-	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
-	#endif
-	#endif
-	  exit (-1);
-	}
-EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c &&
-	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
-	  SYSTEM_NAME=`$dummy $dummyarg` &&
-	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos${UNAME_RELEASE}
-	exit ;;
-    Motorola:PowerMAX_OS:*:*)
-	echo powerpc-motorola-powermax
-	exit ;;
-    Motorola:*:4.3:PL8-*)
-	echo powerpc-harris-powermax
-	exit ;;
-    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	echo powerpc-harris-powermax
-	exit ;;
-    Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit ;;
-    m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit ;;
-    m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit ;;
-    m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit ;;
-    AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
-	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
-	then
-	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
-	       [ ${TARGET_BINARY_INTERFACE}x = x ]
-	    then
-		echo m88k-dg-dgux${UNAME_RELEASE}
-	    else
-		echo m88k-dg-dguxbcs${UNAME_RELEASE}
-	    fi
-	else
-	    echo i586-dg-dgux${UNAME_RELEASE}
-	fi
- 	exit ;;
-    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit ;;
-    M88*:*:R3*:*)
-	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit ;;
-    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit ;;
-    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit ;;
-    *:IRIX*:*:*)
-	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
-	exit ;;
-    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
-    i*86:AIX:*:*)
-	echo i386-ibm-aix
-	exit ;;
-    ia64:AIX:*:*)
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
-	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
-	fi
-	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
-	exit ;;
-    *:AIX:2:3)
-	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		eval $set_cc_for_build
-		sed 's/^		//' << EOF >$dummy.c
-		#include <sys/systemcfg.h>
-
-		main()
-			{
-			if (!__power_pc())
-				exit(1);
-			puts("powerpc-ibm-aix3.2.5");
-			exit(0);
-			}
-EOF
-		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
-		then
-			echo "$SYSTEM_NAME"
-		else
-			echo rs6000-ibm-aix3.2.5
-		fi
-	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
-	else
-		echo rs6000-ibm-aix3.2
-	fi
-	exit ;;
-    *:AIX:*:[4567])
-	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
-	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
-		IBM_ARCH=rs6000
-	else
-		IBM_ARCH=powerpc
-	fi
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
-	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
-	fi
-	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
-	exit ;;
-    *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit ;;
-    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
-	echo romp-ibm-bsd4.4
-	exit ;;
-    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
-	exit ;;                             # report: romp-ibm BSD 4.3
-    *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit ;;
-    DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit ;;
-    9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit ;;
-    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit ;;
-    9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	case "${UNAME_MACHINE}" in
-	    9000/31? )            HP_ARCH=m68000 ;;
-	    9000/[34]?? )         HP_ARCH=m68k ;;
-	    9000/[678][0-9][0-9])
-		if [ -x /usr/bin/getconf ]; then
-		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
-			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
-		fi
-		if [ "${HP_ARCH}" = "" ]; then
-		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
-
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
-
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
-
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
-EOF
-		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
-		    test -z "$HP_ARCH" && HP_ARCH=hppa
-		fi ;;
-	esac
-	if [ ${HP_ARCH} = "hppa2.0w" ]
-	then
-	    eval $set_cc_for_build
-
-	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
-	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
-	    # generating 64-bit code.  GNU and HP use different nomenclature:
-	    #
-	    # $ CC_FOR_BUILD=cc ./config.guess
-	    # => hppa2.0w-hp-hpux11.23
-	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
-	    # => hppa64-hp-hpux11.23
-
-	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
-		grep -q __LP64__
-	    then
-		HP_ARCH="hppa2.0w"
-	    else
-		HP_ARCH="hppa64"
-	    fi
-	fi
-	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
-	exit ;;
-    ia64:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	echo ia64-hp-hpux${HPUX_REV}
-	exit ;;
-    3050*:HI-UX:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <unistd.h>
-	int
-	main ()
-	{
-	  long cpu = sysconf (_SC_CPU_VERSION);
-	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
-	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
-	     results, however.  */
-	  if (CPU_IS_PA_RISC (cpu))
-	    {
-	      switch (cpu)
-		{
-		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
-		  default: puts ("hppa-hitachi-hiuxwe2"); break;
-		}
-	    }
-	  else if (CPU_IS_HP_MC68K (cpu))
-	    puts ("m68k-hitachi-hiuxwe2");
-	  else puts ("unknown-hitachi-hiuxwe2");
-	  exit (0);
-	}
-EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
-		{ echo "$SYSTEM_NAME"; exit; }
-	echo unknown-hitachi-hiuxwe2
-	exit ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
-	echo hppa1.1-hp-bsd
-	exit ;;
-    9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit ;;
-    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	echo hppa1.0-hp-mpeix
-	exit ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
-	echo hppa1.1-hp-osf
-	exit ;;
-    hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit ;;
-    i*86:OSF1:*:*)
-	if [ -x /usr/sbin/sysversion ] ; then
-	    echo ${UNAME_MACHINE}-unknown-osf1mk
-	else
-	    echo ${UNAME_MACHINE}-unknown-osf1
-	fi
-	exit ;;
-    parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit ;;
-    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-        exit ;;
-    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-        exit ;;
-    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-        exit ;;
-    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-        exit ;;
-    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-        exit ;;
-    CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*[A-Z]90:*:*:*)
-	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
-	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
-	      -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*TS:*:*:*)
-	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
-    5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
-    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
-	exit ;;
-    sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi${UNAME_RELEASE}
-	exit ;;
-    *:BSD/OS:*:*)
-	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
-	exit ;;
-    *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
-	    amd64)
-		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
-	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
-	esac
-	exit ;;
-    i*:CYGWIN*:*)
-	echo ${UNAME_MACHINE}-pc-cygwin
-	exit ;;
-    *:MINGW*:*)
-	echo ${UNAME_MACHINE}-pc-mingw32
-	exit ;;
-    i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
-	exit ;;
-    i*:PW*:*)
-	echo ${UNAME_MACHINE}-pc-pw32
-	exit ;;
-    *:Interix*:*)
-    	case ${UNAME_MACHINE} in
-	    x86)
-		echo i586-pc-interix${UNAME_RELEASE}
-		exit ;;
-	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix${UNAME_RELEASE}
-		exit ;;
-	    IA64)
-		echo ia64-unknown-interix${UNAME_RELEASE}
-		exit ;;
-	esac ;;
-    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
-	echo i${UNAME_MACHINE}-pc-mks
-	exit ;;
-    8664:Windows_NT:*)
-	echo x86_64-pc-mks
-	exit ;;
-    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
-	# UNAME_MACHINE based on the output of uname instead of i386?
-	echo i586-pc-interix
-	exit ;;
-    i*:UWIN*:*)
-	echo ${UNAME_MACHINE}-pc-uwin
-	exit ;;
-    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-unknown-cygwin
-	exit ;;
-    p*:CYGWIN*:*)
-	echo powerpcle-unknown-cygwin
-	exit ;;
-    prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit ;;
-    *:GNU:*:*)
-	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
-	exit ;;
-    *:GNU/*:*:*)
-	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
-	exit ;;
-    i*86:Minix:*:*)
-	echo ${UNAME_MACHINE}-pc-minix
-	exit ;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
-	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
-	exit ;;
-    arm*:Linux:*:*)
-	eval $set_cc_for_build
-	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
-	    | grep -q __ARM_EABI__
-	then
-	    echo ${UNAME_MACHINE}-unknown-linux-gnu
-	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
-	fi
-	exit ;;
-    avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    cris:Linux:*:*)
-	echo cris-axis-linux-gnu
-	exit ;;
-    crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
-	exit ;;
-    frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
-	exit ;;
-    i*86:Linux:*:*)
-	LIBC=gnu
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
-	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
-	exit ;;
-    ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    mips:Linux:*:* | mips64:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#undef CPU
-	#undef ${UNAME_MACHINE}
-	#undef ${UNAME_MACHINE}el
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=${UNAME_MACHINE}el
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=${UNAME_MACHINE}
-	#else
-	CPU=
-	#endif
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
-	;;
-    or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
-	exit ;;
-    padre:Linux:*:*)
-	echo sparc-unknown-linux-gnu
-	exit ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
-	exit ;;
-    parisc:Linux:*:* | hppa:Linux:*:*)
-	# Look for CPU level
-	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
-	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
-	  *)    echo hppa-unknown-linux-gnu ;;
-	esac
-	exit ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
-	exit ;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
-	exit ;;
-    s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux
-	exit ;;
-    sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    tile*:Linux:*:*)
-	echo ${UNAME_MACHINE}-tilera-linux-gnu
-	exit ;;
-    vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-gnu
-	exit ;;
-    x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
-	exit ;;
-    xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit ;;
-    i*86:DYNIX/ptx:4*:*)
-	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
-	# earlier versions are messed up and put the nodename in both
-	# sysname and nodename.
-	echo i386-sequent-sysv4
-	exit ;;
-    i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
-	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
-	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
-	exit ;;
-    i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
-	# is probably installed.
-	echo ${UNAME_MACHINE}-pc-os2-emx
-	exit ;;
-    i*86:XTS-300:*:STOP)
-	echo ${UNAME_MACHINE}-unknown-stop
-	exit ;;
-    i*86:atheos:*:*)
-	echo ${UNAME_MACHINE}-unknown-atheos
-	exit ;;
-    i*86:syllable:*:*)
-	echo ${UNAME_MACHINE}-pc-syllable
-	exit ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos${UNAME_RELEASE}
-	exit ;;
-    i*86:*DOS:*:*)
-	echo ${UNAME_MACHINE}-pc-msdosdjgpp
-	exit ;;
-    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
-	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
-	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
-	else
-		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
-	fi
-	exit ;;
-    i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case `/bin/uname -X | grep "^Machine"` in
-	    *486*)	     UNAME_MACHINE=i486 ;;
-	    *Pentium)	     UNAME_MACHINE=i586 ;;
-	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
-	esac
-	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
-	exit ;;
-    i*86:*:3.2:*)
-	if test -f /usr/options/cb.name; then
-		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
-		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
-	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
-		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
-		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
-			&& UNAME_MACHINE=i586
-		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
-	else
-		echo ${UNAME_MACHINE}-pc-sysv32
-	fi
-	exit ;;
-    pc:*:*:*)
-	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
-	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configury will decide that
-	# this is a cross-build.
-	echo i586-pc-msdosdjgpp
-        exit ;;
-    Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit ;;
-    paragon:*:*:*)
-	echo i860-intel-osf1
-	exit ;;
-    i860:*:4.*:*) # i860-SVR4
-	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
-	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
-	fi
-	exit ;;
-    mini*:CTIX:SYS*5:*)
-	# "miniframe"
-	echo m68010-convergent-sysv
-	exit ;;
-    mc68k:UNIX:SYSTEM5:3.51m)
-	echo m68k-convergent-sysv
-	exit ;;
-    M680?0:D-NIX:5.3:*)
-	echo m68k-diab-dnix
-	exit ;;
-    M68*:*:R3V[5678]*:*)
-	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
-    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
-	OS_REL=''
-	test -r /etc/.relid \
-	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
-    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
-    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
-	OS_REL='.3'
-	test -r /etc/.relid \
-	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
-    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos${UNAME_RELEASE}
-	exit ;;
-    mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit ;;
-    TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos${UNAME_RELEASE}
-	exit ;;
-    rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos${UNAME_RELEASE}
-	exit ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos${UNAME_RELEASE}
-	exit ;;
-    SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv${UNAME_RELEASE}
-	exit ;;
-    RM*:ReliantUNIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
-    RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
-    *:SINIX-*:*:*)
-	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=`(uname -p) 2>/dev/null`
-		echo ${UNAME_MACHINE}-sni-sysv4
-	else
-		echo ns32k-sni-sysv
-	fi
-	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
-    *:UNIX_System_V:4*:FTX*)
-	# From Gerald Hewes <hewes@openmarket.com>.
-	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit ;;
-    *:*:*:FTX*)
-	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit ;;
-    i*86:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	echo ${UNAME_MACHINE}-stratus-vos
-	exit ;;
-    *:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	echo hppa1.1-stratus-vos
-	exit ;;
-    mc68*:A/UX:*:*)
-	echo m68k-apple-aux${UNAME_RELEASE}
-	exit ;;
-    news*:NEWS-OS:6*:*)
-	echo mips-sony-newsos6
-	exit ;;
-    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
-	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
-	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
-	fi
-        exit ;;
-    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	echo powerpc-be-beos
-	exit ;;
-    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	echo powerpc-apple-beos
-	exit ;;
-    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	echo i586-pc-beos
-	exit ;;
-    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	echo i586-pc-haiku
-	exit ;;
-    SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux${UNAME_RELEASE}
-	exit ;;
-    SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux${UNAME_RELEASE}
-	exit ;;
-    SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux${UNAME_RELEASE}
-	exit ;;
-    SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux${UNAME_RELEASE}
-	exit ;;
-    SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux${UNAME_RELEASE}
-	exit ;;
-    SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux${UNAME_RELEASE}
-	exit ;;
-    Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody${UNAME_RELEASE}
-	exit ;;
-    *:Rhapsody:*:*)
-	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
-	exit ;;
-    *:Darwin:*:*)
-	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	case $UNAME_PROCESSOR in
-	    i386)
-		eval $set_cc_for_build
-		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
-		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
-		      grep IS_64BIT_ARCH >/dev/null
-		  then
-		      UNAME_PROCESSOR="x86_64"
-		  fi
-		fi ;;
-	    unknown) UNAME_PROCESSOR=powerpc ;;
-	esac
-	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
-	exit ;;
-    *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=`uname -p`
-	if test "$UNAME_PROCESSOR" = "x86"; then
-		UNAME_PROCESSOR=i386
-		UNAME_MACHINE=pc
-	fi
-	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
-	exit ;;
-    *:QNX:*:4*)
-	echo i386-pc-qnx
-	exit ;;
-    NSE-?:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk${UNAME_RELEASE}
-	exit ;;
-    NSR-?:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk${UNAME_RELEASE}
-	exit ;;
-    *:NonStop-UX:*:*)
-	echo mips-compaq-nonstopux
-	exit ;;
-    BS2000:POSIX*:*:*)
-	echo bs2000-siemens-sysv
-	exit ;;
-    DS/*:UNIX_System_V:*:*)
-	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
-	exit ;;
-    *:Plan9:*:*)
-	# "uname -m" is not consistent, so use $cputype instead. 386
-	# is converted to i386 for consistency with other x86
-	# operating systems.
-	if test "$cputype" = "386"; then
-	    UNAME_MACHINE=i386
-	else
-	    UNAME_MACHINE="$cputype"
-	fi
-	echo ${UNAME_MACHINE}-unknown-plan9
-	exit ;;
-    *:TOPS-10:*:*)
-	echo pdp10-unknown-tops10
-	exit ;;
-    *:TENEX:*:*)
-	echo pdp10-unknown-tenex
-	exit ;;
-    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	echo pdp10-dec-tops20
-	exit ;;
-    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	echo pdp10-xkl-tops20
-	exit ;;
-    *:TOPS-20:*:*)
-	echo pdp10-unknown-tops20
-	exit ;;
-    *:ITS:*:*)
-	echo pdp10-unknown-its
-	exit ;;
-    SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
-	exit ;;
-    *:DragonFly:*:*)
-	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-	exit ;;
-    *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
-	case "${UNAME_MACHINE}" in
-	    A*) echo alpha-dec-vms ; exit ;;
-	    I*) echo ia64-dec-vms ; exit ;;
-	    V*) echo vax-dec-vms ; exit ;;
-	esac ;;
-    *:XENIX:*:SysV)
-	echo i386-pc-xenix
-	exit ;;
-    i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
-	exit ;;
-    i*86:rdos:*:*)
-	echo ${UNAME_MACHINE}-pc-rdos
-	exit ;;
-    i*86:AROS:*:*)
-	echo ${UNAME_MACHINE}-pc-aros
-	exit ;;
-esac
-
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
-eval $set_cc_for_build
-cat >$dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
-  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
-     I don't know....  */
-  printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
-  printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
-          "4"
-#else
-	  ""
-#endif
-         ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
-  printf ("arm-acorn-riscix\n"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
-  printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
-  int version;
-  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
-  if (version < 4)
-    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
-  else
-    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
-  exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
-  printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
-  printf ("ns32k-encore-mach\n"); exit (0);
-#else
-  printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
-  printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
-  printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
-  printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
-    struct utsname un;
-
-    uname(&un);
-
-    if (strncmp(un.version, "V2", 2) == 0) {
-	printf ("i386-sequent-ptx2\n"); exit (0);
-    }
-    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
-	printf ("i386-sequent-ptx1\n"); exit (0);
-    }
-    printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-# if !defined (ultrix)
-#  include <sys/param.h>
-#  if defined (BSD)
-#   if BSD == 43
-      printf ("vax-dec-bsd4.3\n"); exit (0);
-#   else
-#    if BSD == 199006
-      printf ("vax-dec-bsd4.3reno\n"); exit (0);
-#    else
-      printf ("vax-dec-bsd\n"); exit (0);
-#    endif
-#   endif
-#  else
-    printf ("vax-dec-bsd\n"); exit (0);
-#  endif
-# else
-    printf ("vax-dec-ultrix\n"); exit (0);
-# endif
-#endif
-
-#if defined (alliant) && defined (i860)
-  printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
-  exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
-	{ echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
-    case `getsysinfo -f cpu_type` in
-    c1*)
-	echo c1-convex-bsd
-	exit ;;
-    c2*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-	exit ;;
-    c34*)
-	echo c34-convex-bsd
-	exit ;;
-    c38*)
-	echo c38-convex-bsd
-	exit ;;
-    c4*)
-	echo c4-convex-bsd
-	exit ;;
-    esac
-fi
-
-cat >&2 <<EOF
-$0: unable to guess system type
-
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
-
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
-and
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches@gnu.org> in order to provide the needed
-information to handle your system.
-
-config.guess timestamp = $timestamp
-
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
-
-hostinfo               = `(hostinfo) 2>/dev/null`
-/bin/universe          = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch              = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
-
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM  = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
-EOF
-
-exit 1
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
+/usr/share/automake-1.11/config.guess
+\ No newline at end of file
diff --git a/volk/config.sub b/volk/config.sub
index 320e30388..4d47fbcbc 100755..120000
--- a/volk/config.sub
+++ b/volk/config.sub
@@ -1,1739 +1 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-#   Free Software Foundation, Inc.
-
-timestamp='2010-09-11'
-
-# This file is (in principle) common to ALL GNU software.
-# The presence of a machine in this file suggests that SOME GNU software
-# can handle that machine.  It does not imply ALL GNU software can.
-#
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted GNU ChangeLog entry.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
-       $0 [OPTION] ALIAS
-
-Canonicalize a configuration name.
-
-Operation modes:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help"
-       exit 1 ;;
-
-    *local*)
-       # First pass through any local machine types.
-       echo $1
-       exit ;;
-
-    * )
-       break ;;
-  esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
-    exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
-    exit 1;;
-esac
-
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
-  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | \
-  kopensolaris*-gnu* | \
-  storm-chaos* | os2-emx* | rtmk-nova*)
-    os=-$maybe_os
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
-    ;;
-  *)
-    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
-    if [ $basic_machine != $1 ]
-    then os=`echo $1 | sed 's/.*-/-/'`
-    else os=; fi
-    ;;
-esac
-
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work.  We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
-	-sun*os*)
-		# Prevent following clause from handling this invalid input.
-		;;
-	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
-	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
-	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
-	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis | -knuth | -cray | -microblaze)
-		os=
-		basic_machine=$1
-		;;
-        -bluegene*)
-	        os=-cnk
-		;;
-	-sim | -cisco | -oki | -wec | -winbond)
-		os=
-		basic_machine=$1
-		;;
-	-scout)
-		;;
-	-wrs)
-		os=-vxworks
-		basic_machine=$1
-		;;
-	-chorusos*)
-		os=-chorusos
-		basic_machine=$1
-		;;
- 	-chorusrdb)
- 		os=-chorusrdb
-		basic_machine=$1
- 		;;
-	-hiux*)
-		os=-hiuxwe2
-		;;
-	-sco6)
-		os=-sco5v6
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5)
-		os=-sco3.2v5
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco4)
-		os=-sco3.2v4
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2v[4-9]*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco*)
-		os=-sco3.2v2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-udk*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-isc)
-		os=-isc2.2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-clix*)
-		basic_machine=clipper-intergraph
-		;;
-	-isc*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-lynx*)
-		os=-lynxos
-		;;
-	-ptx*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
-		;;
-	-windowsnt*)
-		os=`echo $os | sed -e 's/windowsnt/winnt/'`
-		;;
-	-psos*)
-		os=-psos
-		;;
-	-mint | -mint[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
-		;;
-esac
-
-# Decode aliases for certain CPU-COMPANY combinations.
-case $basic_machine in
-	# Recognize the basic CPU types without company name.
-	# Some are omitted here because they have special meanings below.
-	1750a | 580 \
-	| a29k \
-	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
-	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
-	| am33_2.0 \
-	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-	| bfin \
-	| c4x | clipper \
-	| d10v | d30v | dlx | dsp16xx \
-	| fido | fr30 | frv \
-	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-	| i370 | i860 | i960 | ia64 \
-	| ip2k | iq2000 \
-	| lm32 \
-	| m32c | m32r | m32rle | m68000 | m68k | m88k \
-	| maxq | mb | microblaze | mcore | mep | metag \
-	| mips | mipsbe | mipseb | mipsel | mipsle \
-	| mips16 \
-	| mips64 | mips64el \
-	| mips64octeon | mips64octeonel \
-	| mips64orion | mips64orionel \
-	| mips64r5900 | mips64r5900el \
-	| mips64vr | mips64vrel \
-	| mips64vr4100 | mips64vr4100el \
-	| mips64vr4300 | mips64vr4300el \
-	| mips64vr5000 | mips64vr5000el \
-	| mips64vr5900 | mips64vr5900el \
-	| mipsisa32 | mipsisa32el \
-	| mipsisa32r2 | mipsisa32r2el \
-	| mipsisa64 | mipsisa64el \
-	| mipsisa64r2 | mipsisa64r2el \
-	| mipsisa64sb1 | mipsisa64sb1el \
-	| mipsisa64sr71k | mipsisa64sr71kel \
-	| mipstx39 | mipstx39el \
-	| mn10200 | mn10300 \
-	| moxie \
-	| mt \
-	| msp430 \
-	| nds32 | nds32le | nds32be \
-	| nios | nios2 \
-	| ns16k | ns32k \
-	| or32 \
-	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
-	| pyramid \
-	| rx \
-	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
-	| sh64 | sh64le \
-	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
-	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu | strongarm \
-	| tahoe | thumb | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
-	| ubicom32 \
-	| v850 | v850e \
-	| we32k \
-	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
-	| z8k | z80)
-		basic_machine=$basic_machine-unknown
-		;;
-	c54x)
-		basic_machine=tic54x-unknown
-		;;
-	c55x)
-		basic_machine=tic55x-unknown
-		;;
-	c6x)
-		basic_machine=tic6x-unknown
-		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | picochip)
-		# Motorola 68HC11/12.
-		basic_machine=$basic_machine-unknown
-		os=-none
-		;;
-	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
-		;;
-	ms1)
-		basic_machine=mt-unknown
-		;;
-
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-	  basic_machine=$basic_machine-pc
-	  ;;
-	# Object if more than one company name word.
-	*-*-*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-	# Recognize the basic CPU types with company name.
-	580-* \
-	| a29k-* \
-	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
-	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
-	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
-	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* | avr32-* \
-	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* \
-	| clipper-* | craynv-* | cydra-* \
-	| d10v-* | d30v-* | dlx-* \
-	| elxsi-* \
-	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
-	| h8300-* | h8500-* \
-	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
-	| i*86-* | i860-* | i960-* | ia64-* \
-	| ip2k-* | iq2000-* \
-	| lm32-* \
-	| m32c-* | m32r-* | m32rle-* \
-	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
-	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
-	| mips16-* \
-	| mips64-* | mips64el-* \
-	| mips64octeon-* | mips64octeonel-* \
-	| mips64orion-* | mips64orionel-* \
-	| mips64r5900-* | mips64r5900el-* \
-	| mips64vr-* | mips64vrel-* \
-	| mips64vr4100-* | mips64vr4100el-* \
-	| mips64vr4300-* | mips64vr4300el-* \
-	| mips64vr5000-* | mips64vr5000el-* \
-	| mips64vr5900-* | mips64vr5900el-* \
-	| mipsisa32-* | mipsisa32el-* \
-	| mipsisa32r2-* | mipsisa32r2el-* \
-	| mipsisa64-* | mipsisa64el-* \
-	| mipsisa64r2-* | mipsisa64r2el-* \
-	| mipsisa64sb1-* | mipsisa64sb1el-* \
-	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
-	| mipstx39-* | mipstx39el-* \
-	| mmix-* \
-	| mt-* \
-	| msp430-* \
-	| nds32-* | nds32le-* | nds32be-* \
-	| nios-* | nios2-* \
-	| none-* | np1-* | ns16k-* | ns32k-* \
-	| orion-* \
-	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
-	| pyramid-* \
-	| romp-* | rs6000-* | rx-* \
-	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
-	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
-	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
-	| tile-* | tilegx-* \
-	| tron-* \
-	| ubicom32-* \
-	| v850-* | v850e-* | vax-* \
-	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
-	| xstormy16-* | xtensa*-* \
-	| ymp-* \
-	| z8k-* | z80-*)
-		;;
-	# Recognize the basic CPU types without company name, with glob match.
-	xtensa*)
-		basic_machine=$basic_machine-unknown
-		;;
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	386bsd)
-		basic_machine=i386-unknown
-		os=-bsd
-		;;
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		basic_machine=m68000-att
-		;;
-	3b*)
-		basic_machine=we32k-att
-		;;
-	a29khif)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-    	abacus)
-		basic_machine=abacus-unknown
-		;;
-	adobe68k)
-		basic_machine=m68010-adobe
-		os=-scout
-		;;
-	alliant | fx80)
-		basic_machine=fx80-alliant
-		;;
-	altos | altos3068)
-		basic_machine=m68k-altos
-		;;
-	am29k)
-		basic_machine=a29k-none
-		os=-bsd
-		;;
-	amd64)
-		basic_machine=x86_64-pc
-		;;
-	amd64-*)
-		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	amdahl)
-		basic_machine=580-amdahl
-		os=-sysv
-		;;
-	amiga | amiga-*)
-		basic_machine=m68k-unknown
-		;;
-	amigaos | amigados)
-		basic_machine=m68k-unknown
-		os=-amigaos
-		;;
-	amigaunix | amix)
-		basic_machine=m68k-unknown
-		os=-sysv4
-		;;
-	apollo68)
-		basic_machine=m68k-apollo
-		os=-sysv
-		;;
-	apollo68bsd)
-		basic_machine=m68k-apollo
-		os=-bsd
-		;;
-	aros)
-		basic_machine=i386-pc
-		os=-aros
-		;;
-	aux)
-		basic_machine=m68k-apple
-		os=-aux
-		;;
-	balance)
-		basic_machine=ns32k-sequent
-		os=-dynix
-		;;
-	blackfin)
-		basic_machine=bfin-unknown
-		os=-linux
-		;;
-	blackfin-*)
-		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	bluegene*)
-		basic_machine=powerpc-ibm
-		os=-cnk
-		;;
-	c54x-*)
-		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c55x-*)
-		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c6x-*)
-		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c90)
-		basic_machine=c90-cray
-		os=-unicos
-		;;
-        cegcc)
-		basic_machine=arm-unknown
-		os=-cegcc
-		;;
-	convex-c1)
-		basic_machine=c1-convex
-		os=-bsd
-		;;
-	convex-c2)
-		basic_machine=c2-convex
-		os=-bsd
-		;;
-	convex-c32)
-		basic_machine=c32-convex
-		os=-bsd
-		;;
-	convex-c34)
-		basic_machine=c34-convex
-		os=-bsd
-		;;
-	convex-c38)
-		basic_machine=c38-convex
-		os=-bsd
-		;;
-	cray | j90)
-		basic_machine=j90-cray
-		os=-unicos
-		;;
-	craynv)
-		basic_machine=craynv-cray
-		os=-unicosmp
-		;;
-	cr16)
-		basic_machine=cr16-unknown
-		os=-elf
-		;;
-	crds | unos)
-		basic_machine=m68k-crds
-		;;
-	crisv32 | crisv32-* | etraxfs*)
-		basic_machine=crisv32-axis
-		;;
-	cris | cris-* | etrax*)
-		basic_machine=cris-axis
-		;;
-	crx)
-		basic_machine=crx-unknown
-		os=-elf
-		;;
-	da30 | da30-*)
-		basic_machine=m68k-da30
-		;;
-	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
-		basic_machine=mips-dec
-		;;
-	decsystem10* | dec10*)
-		basic_machine=pdp10-dec
-		os=-tops10
-		;;
-	decsystem20* | dec20*)
-		basic_machine=pdp10-dec
-		os=-tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		basic_machine=m68k-motorola
-		;;
-	delta88)
-		basic_machine=m88k-motorola
-		os=-sysv3
-		;;
-	dicos)
-		basic_machine=i686-pc
-		os=-dicos
-		;;
-	djgpp)
-		basic_machine=i586-pc
-		os=-msdosdjgpp
-		;;
-	dpx20 | dpx20-*)
-		basic_machine=rs6000-bull
-		os=-bosx
-		;;
-	dpx2* | dpx2*-bull)
-		basic_machine=m68k-bull
-		os=-sysv3
-		;;
-	ebmon29k)
-		basic_machine=a29k-amd
-		os=-ebmon
-		;;
-	elxsi)
-		basic_machine=elxsi-elxsi
-		os=-bsd
-		;;
-	encore | umax | mmax)
-		basic_machine=ns32k-encore
-		;;
-	es1800 | OSE68k | ose68k | ose | OSE)
-		basic_machine=m68k-ericsson
-		os=-ose
-		;;
-	fx2800)
-		basic_machine=i860-alliant
-		;;
-	genix)
-		basic_machine=ns32k-ns
-		;;
-	gmicro)
-		basic_machine=tron-gmicro
-		os=-sysv
-		;;
-	go32)
-		basic_machine=i386-pc
-		os=-go32
-		;;
-	h3050r* | hiux*)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	h8300hms)
-		basic_machine=h8300-hitachi
-		os=-hms
-		;;
-	h8300xray)
-		basic_machine=h8300-hitachi
-		os=-xray
-		;;
-	h8500hms)
-		basic_machine=h8500-hitachi
-		os=-hms
-		;;
-	harris)
-		basic_machine=m88k-harris
-		os=-sysv3
-		;;
-	hp300-*)
-		basic_machine=m68k-hp
-		;;
-	hp300bsd)
-		basic_machine=m68k-hp
-		os=-bsd
-		;;
-	hp300hpux)
-		basic_machine=m68k-hp
-		os=-hpux
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		basic_machine=m68000-hp
-		;;
-	hp9k3[2-9][0-9])
-		basic_machine=m68k-hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hppa-next)
-		os=-nextstep3
-		;;
-	hppaosf)
-		basic_machine=hppa1.1-hp
-		os=-osf
-		;;
-	hppro)
-		basic_machine=hppa1.1-hp
-		os=-proelf
-		;;
-	i370-ibm* | ibm*)
-		basic_machine=i370-ibm
-		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
-	i*86v32)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv32
-		;;
-	i*86v4*)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv4
-		;;
-	i*86v)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv
-		;;
-	i*86sol2)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-solaris2
-		;;
-	i386mach)
-		basic_machine=i386-mach
-		os=-mach
-		;;
-	i386-vsta | vsta)
-		basic_machine=i386-unknown
-		os=-vsta
-		;;
-	iris | iris4d)
-		basic_machine=mips-sgi
-		case $os in
-		    -irix*)
-			;;
-		    *)
-			os=-irix4
-			;;
-		esac
-		;;
-	isi68 | isi)
-		basic_machine=m68k-isi
-		os=-sysv
-		;;
-	m68knommu)
-		basic_machine=m68k-unknown
-		os=-linux
-		;;
-	m68knommu-*)
-		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	m88k-omron*)
-		basic_machine=m88k-omron
-		;;
-	magnum | m3230)
-		basic_machine=mips-mips
-		os=-sysv
-		;;
-	merlin)
-		basic_machine=ns32k-utek
-		os=-sysv
-		;;
-        microblaze)
-		basic_machine=microblaze-xilinx
-		;;
-	mingw32)
-		basic_machine=i386-pc
-		os=-mingw32
-		;;
-	mingw32ce)
-		basic_machine=arm-unknown
-		os=-mingw32ce
-		;;
-	miniframe)
-		basic_machine=m68000-convergent
-		;;
-	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
-		;;
-	mips3*-*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
-		;;
-	mips3*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
-		;;
-	monitor)
-		basic_machine=m68k-rom68k
-		os=-coff
-		;;
-	morphos)
-		basic_machine=powerpc-unknown
-		os=-morphos
-		;;
-	msdos)
-		basic_machine=i386-pc
-		os=-msdos
-		;;
-	ms1-*)
-		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
-		;;
-	mvs)
-		basic_machine=i370-ibm
-		os=-mvs
-		;;
-	ncr3000)
-		basic_machine=i486-ncr
-		os=-sysv4
-		;;
-	netbsd386)
-		basic_machine=i386-unknown
-		os=-netbsd
-		;;
-	netwinder)
-		basic_machine=armv4l-rebel
-		os=-linux
-		;;
-	news | news700 | news800 | news900)
-		basic_machine=m68k-sony
-		os=-newsos
-		;;
-	news1000)
-		basic_machine=m68030-sony
-		os=-newsos
-		;;
-	news-3600 | risc-news)
-		basic_machine=mips-sony
-		os=-newsos
-		;;
-	necv70)
-		basic_machine=v70-nec
-		os=-sysv
-		;;
-	next | m*-next )
-		basic_machine=m68k-next
-		case $os in
-		    -nextstep* )
-			;;
-		    -ns2*)
-		      os=-nextstep2
-			;;
-		    *)
-		      os=-nextstep3
-			;;
-		esac
-		;;
-	nh3000)
-		basic_machine=m68k-harris
-		os=-cxux
-		;;
-	nh[45]000)
-		basic_machine=m88k-harris
-		os=-cxux
-		;;
-	nindy960)
-		basic_machine=i960-intel
-		os=-nindy
-		;;
-	mon960)
-		basic_machine=i960-intel
-		os=-mon960
-		;;
-	nonstopux)
-		basic_machine=mips-compaq
-		os=-nonstopux
-		;;
-	np1)
-		basic_machine=np1-gould
-		;;
-        neo-tandem)
-		basic_machine=neo-tandem
-		;;
-        nse-tandem)
-		basic_machine=nse-tandem
-		;;
-	nsr-tandem)
-		basic_machine=nsr-tandem
-		;;
-	op50n-* | op60c-*)
-		basic_machine=hppa1.1-oki
-		os=-proelf
-		;;
-	openrisc | openrisc-*)
-		basic_machine=or32-unknown
-		;;
-	os400)
-		basic_machine=powerpc-ibm
-		os=-os400
-		;;
-	OSE68000 | ose68000)
-		basic_machine=m68000-ericsson
-		os=-ose
-		;;
-	os68k)
-		basic_machine=m68k-none
-		os=-os68k
-		;;
-	pa-hitachi)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	paragon)
-		basic_machine=i860-intel
-		os=-osf
-		;;
-	parisc)
-		basic_machine=hppa-unknown
-		os=-linux
-		;;
-	parisc-*)
-		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	pbd)
-		basic_machine=sparc-tti
-		;;
-	pbb)
-		basic_machine=m68k-tti
-		;;
-	pc532 | pc532-*)
-		basic_machine=ns32k-pc532
-		;;
-	pc98)
-		basic_machine=i386-pc
-		;;
-	pc98-*)
-		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentium | p5 | k5 | k6 | nexgen | viac3)
-		basic_machine=i586-pc
-		;;
-	pentiumpro | p6 | 6x86 | athlon | athlon_*)
-		basic_machine=i686-pc
-		;;
-	pentiumii | pentium2 | pentiumiii | pentium3)
-		basic_machine=i686-pc
-		;;
-	pentium4)
-		basic_machine=i786-pc
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentium4-*)
-		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pn)
-		basic_machine=pn-gould
-		;;
-	power)	basic_machine=power-ibm
-		;;
-	ppc)	basic_machine=powerpc-unknown
-		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppcle | powerpclittle | ppc-le | powerpc-little)
-		basic_machine=powerpcle-unknown
-		;;
-	ppcle-* | powerpclittle-*)
-		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppc64)	basic_machine=powerpc64-unknown
-		;;
-	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
-		basic_machine=powerpc64le-unknown
-		;;
-	ppc64le-* | powerpc64little-*)
-		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ps2)
-		basic_machine=i386-ibm
-		;;
-	pw32)
-		basic_machine=i586-unknown
-		os=-pw32
-		;;
-	rdos)
-		basic_machine=i386-pc
-		os=-rdos
-		;;
-	rom68k)
-		basic_machine=m68k-rom68k
-		os=-coff
-		;;
-	rm[46]00)
-		basic_machine=mips-siemens
-		;;
-	rtpc | rtpc-*)
-		basic_machine=romp-ibm
-		;;
-	s390 | s390-*)
-		basic_machine=s390-ibm
-		;;
-	s390x | s390x-*)
-		basic_machine=s390x-ibm
-		;;
-	sa29200)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	sb1)
-		basic_machine=mipsisa64sb1-unknown
-		;;
-	sb1el)
-		basic_machine=mipsisa64sb1el-unknown
-		;;
-	sde)
-		basic_machine=mipsisa32-sde
-		os=-elf
-		;;
-	sei)
-		basic_machine=mips-sei
-		os=-seiux
-		;;
-	sequent)
-		basic_machine=i386-sequent
-		;;
-	sh)
-		basic_machine=sh-hitachi
-		os=-hms
-		;;
-	sh5el)
-		basic_machine=sh5le-unknown
-		;;
-	sh64)
-		basic_machine=sh64-unknown
-		;;
-	sparclite-wrs | simso-wrs)
-		basic_machine=sparclite-wrs
-		os=-vxworks
-		;;
-	sps7)
-		basic_machine=m68k-bull
-		os=-sysv2
-		;;
-	spur)
-		basic_machine=spur-unknown
-		;;
-	st2000)
-		basic_machine=m68k-tandem
-		;;
-	stratus)
-		basic_machine=i860-stratus
-		os=-sysv4
-		;;
-	sun2)
-		basic_machine=m68000-sun
-		;;
-	sun2os3)
-		basic_machine=m68000-sun
-		os=-sunos3
-		;;
-	sun2os4)
-		basic_machine=m68000-sun
-		os=-sunos4
-		;;
-	sun3os3)
-		basic_machine=m68k-sun
-		os=-sunos3
-		;;
-	sun3os4)
-		basic_machine=m68k-sun
-		os=-sunos4
-		;;
-	sun4os3)
-		basic_machine=sparc-sun
-		os=-sunos3
-		;;
-	sun4os4)
-		basic_machine=sparc-sun
-		os=-sunos4
-		;;
-	sun4sol2)
-		basic_machine=sparc-sun
-		os=-solaris2
-		;;
-	sun3 | sun3-*)
-		basic_machine=m68k-sun
-		;;
-	sun4)
-		basic_machine=sparc-sun
-		;;
-	sun386 | sun386i | roadrunner)
-		basic_machine=i386-sun
-		;;
-	sv1)
-		basic_machine=sv1-cray
-		os=-unicos
-		;;
-	symmetry)
-		basic_machine=i386-sequent
-		os=-dynix
-		;;
-	t3e)
-		basic_machine=alphaev5-cray
-		os=-unicos
-		;;
-	t90)
-		basic_machine=t90-cray
-		os=-unicos
-		;;
-        # This must be matched before tile*.
-        tilegx*)
-		basic_machine=tilegx-unknown
-		os=-linux-gnu
-		;;
-	tile*)
-		basic_machine=tile-unknown
-		os=-linux-gnu
-		;;
-	tx39)
-		basic_machine=mipstx39-unknown
-		;;
-	tx39el)
-		basic_machine=mipstx39el-unknown
-		;;
-	toad1)
-		basic_machine=pdp10-xkl
-		os=-tops20
-		;;
-	tower | tower-32)
-		basic_machine=m68k-ncr
-		;;
-	tpf)
-		basic_machine=s390x-ibm
-		os=-tpf
-		;;
-	udi29k)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	ultra3)
-		basic_machine=a29k-nyu
-		os=-sym1
-		;;
-	v810 | necv810)
-		basic_machine=v810-nec
-		os=-none
-		;;
-	vaxv)
-		basic_machine=vax-dec
-		os=-sysv
-		;;
-	vms)
-		basic_machine=vax-dec
-		os=-vms
-		;;
-	vpp*|vx|vx-*)
-		basic_machine=f301-fujitsu
-		;;
-	vxworks960)
-		basic_machine=i960-wrs
-		os=-vxworks
-		;;
-	vxworks68)
-		basic_machine=m68k-wrs
-		os=-vxworks
-		;;
-	vxworks29k)
-		basic_machine=a29k-wrs
-		os=-vxworks
-		;;
-	w65*)
-		basic_machine=w65-wdc
-		os=-none
-		;;
-	w89k-*)
-		basic_machine=hppa1.1-winbond
-		os=-proelf
-		;;
-	xbox)
-		basic_machine=i686-pc
-		os=-mingw32
-		;;
-	xps | xps100)
-		basic_machine=xps100-honeywell
-		;;
-	ymp)
-		basic_machine=ymp-cray
-		os=-unicos
-		;;
-	z8k-*-coff)
-		basic_machine=z8k-unknown
-		os=-sim
-		;;
-	z80-*-coff)
-		basic_machine=z80-unknown
-		os=-sim
-		;;
-	none)
-		basic_machine=none-none
-		os=-none
-		;;
-
-# Here we handle the default manufacturer of certain CPU types.  It is in
-# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		basic_machine=hppa1.1-winbond
-		;;
-	op50n)
-		basic_machine=hppa1.1-oki
-		;;
-	op60c)
-		basic_machine=hppa1.1-oki
-		;;
-	romp)
-		basic_machine=romp-ibm
-		;;
-	mmix)
-		basic_machine=mmix-knuth
-		;;
-	rs6000)
-		basic_machine=rs6000-ibm
-		;;
-	vax)
-		basic_machine=vax-dec
-		;;
-	pdp10)
-		# there are many clones, so DEC is not a safe bet
-		basic_machine=pdp10-unknown
-		;;
-	pdp11)
-		basic_machine=pdp11-dec
-		;;
-	we32k)
-		basic_machine=we32k-att
-		;;
-	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
-		basic_machine=sh-unknown
-		;;
-	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
-		basic_machine=sparc-sun
-		;;
-	cydra)
-		basic_machine=cydra-cydrome
-		;;
-	orion)
-		basic_machine=orion-highlevel
-		;;
-	orion105)
-		basic_machine=clipper-highlevel
-		;;
-	mac | mpw | mac-mpw)
-		basic_machine=m68k-apple
-		;;
-	pmac | pmac-mpw)
-		basic_machine=powerpc-apple
-		;;
-	*-unknown)
-		# Make sure to match an already-canonicalized machine name.
-		;;
-	*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
-	*-digital*)
-		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
-		;;
-	*-commodore*)
-		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x"$os" != x"" ]
-then
-case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
-	# -solaris* is a basic system type, with this one exception.
-        -auroraux)
-	        os=-auroraux
-		;;
-	-solaris1 | -solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
-		;;
-	-solaris)
-		os=-solaris2
-		;;
-	-svr4*)
-		os=-sysv4
-		;;
-	-unixware*)
-		os=-sysv4.2uw
-		;;
-	-gnu/linux*)
-		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
-		;;
-	# First accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST END IN A *, to match a version number.
-	# -sysv* is not here because it comes later, after sysvr4.
-	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
-	      | -sym* | -kopensolaris* \
-	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* \
-	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
-	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -openbsd* | -solidbsd* \
-	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
-	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
-	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
-	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-android* \
-	      | -linux-newlib* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* \
-	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
-	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
-	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
-	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
-	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
-	# Remember, each alternative MUST END IN *, to match a version number.
-		;;
-	-qnx*)
-		case $basic_machine in
-		    x86-* | i*86-*)
-			;;
-		    *)
-			os=-nto$os
-			;;
-		esac
-		;;
-	-nto-qnx*)
-		;;
-	-nto*)
-		os=`echo $os | sed -e 's|nto|nto-qnx|'`
-		;;
-	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
-	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
-		;;
-	-mac*)
-		os=`echo $os | sed -e 's|mac|macos|'`
-		;;
-	-linux-dietlibc)
-		os=-linux-dietlibc
-		;;
-	-linux*)
-		os=`echo $os | sed -e 's|linux|linux-gnu|'`
-		;;
-	-sunos5*)
-		os=`echo $os | sed -e 's|sunos5|solaris2|'`
-		;;
-	-sunos6*)
-		os=`echo $os | sed -e 's|sunos6|solaris3|'`
-		;;
-	-opened*)
-		os=-openedition
-		;;
-        -os400*)
-		os=-os400
-		;;
-	-wince*)
-		os=-wince
-		;;
-	-osfrose*)
-		os=-osfrose
-		;;
-	-osf*)
-		os=-osf
-		;;
-	-utek*)
-		os=-bsd
-		;;
-	-dynix*)
-		os=-bsd
-		;;
-	-acis*)
-		os=-aos
-		;;
-	-atheos*)
-		os=-atheos
-		;;
-	-syllable*)
-		os=-syllable
-		;;
-	-386bsd)
-		os=-bsd
-		;;
-	-ctix* | -uts*)
-		os=-sysv
-		;;
-	-nova*)
-		os=-rtmk-nova
-		;;
-	-ns2 )
-		os=-nextstep2
-		;;
-	-nsk*)
-		os=-nsk
-		;;
-	# Preserve the version number of sinix5.
-	-sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
-		;;
-	-sinix*)
-		os=-sysv4
-		;;
-        -tpf*)
-		os=-tpf
-		;;
-	-triton*)
-		os=-sysv3
-		;;
-	-oss*)
-		os=-sysv3
-		;;
-	-svr4)
-		os=-sysv4
-		;;
-	-svr3)
-		os=-sysv3
-		;;
-	-sysvr4)
-		os=-sysv4
-		;;
-	# This must come after -sysvr4.
-	-sysv*)
-		;;
-	-ose*)
-		os=-ose
-		;;
-	-es1800*)
-		os=-ose
-		;;
-	-xenix)
-		os=-xenix
-		;;
-	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
-		os=-mint
-		;;
-	-aros*)
-		os=-aros
-		;;
-	-kaos*)
-		os=-kaos
-		;;
-	-zvmoe)
-		os=-zvmoe
-		;;
-	-dicos*)
-		os=-dicos
-		;;
-        -nacl*)
-	        ;;
-	-none)
-		;;
-	*)
-		# Get rid of the `-' at the beginning of $os.
-		os=`echo $os | sed 's/[^-]*-//'`
-		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
-		exit 1
-		;;
-esac
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-case $basic_machine in
-        score-*)
-		os=-elf
-		;;
-        spu-*)
-		os=-elf
-		;;
-	*-acorn)
-		os=-riscix1.2
-		;;
-	arm*-rebel)
-		os=-linux
-		;;
-	arm*-semi)
-		os=-aout
-		;;
-        c4x-* | tic4x-*)
-        	os=-coff
-		;;
-	tic54x-*)
-		os=-coff
-		;;
-	tic55x-*)
-		os=-coff
-		;;
-	tic6x-*)
-		os=-coff
-		;;
-	# This must come before the *-dec entry.
-	pdp10-*)
-		os=-tops20
-		;;
-	pdp11-*)
-		os=-none
-		;;
-	*-dec | vax-*)
-		os=-ultrix4.2
-		;;
-	m68*-apollo)
-		os=-domain
-		;;
-	i386-sun)
-		os=-sunos4.0.2
-		;;
-	m68000-sun)
-		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
-		;;
-	m68*-cisco)
-		os=-aout
-		;;
-        mep-*)
-		os=-elf
-		;;
-	mips*-cisco)
-		os=-elf
-		;;
-	mips*-*)
-		os=-elf
-		;;
-	or32-*)
-		os=-coff
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=-sysv3
-		;;
-	sparc-* | *-sun)
-		os=-sunos4.1.1
-		;;
-	*-be)
-		os=-beos
-		;;
-	*-haiku)
-		os=-haiku
-		;;
-	*-ibm)
-		os=-aix
-		;;
-    	*-knuth)
-		os=-mmixware
-		;;
-	*-wec)
-		os=-proelf
-		;;
-	*-winbond)
-		os=-proelf
-		;;
-	*-oki)
-		os=-proelf
-		;;
-	*-hp)
-		os=-hpux
-		;;
-	*-hitachi)
-		os=-hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=-sysv
-		;;
-	*-cbm)
-		os=-amigaos
-		;;
-	*-dg)
-		os=-dgux
-		;;
-	*-dolphin)
-		os=-sysv3
-		;;
-	m68k-ccur)
-		os=-rtu
-		;;
-	m88k-omron*)
-		os=-luna
-		;;
-	*-next )
-		os=-nextstep
-		;;
-	*-sequent)
-		os=-ptx
-		;;
-	*-crds)
-		os=-unos
-		;;
-	*-ns)
-		os=-genix
-		;;
-	i370-*)
-		os=-mvs
-		;;
-	*-next)
-		os=-nextstep3
-		;;
-	*-gould)
-		os=-sysv
-		;;
-	*-highlevel)
-		os=-bsd
-		;;
-	*-encore)
-		os=-bsd
-		;;
-	*-sgi)
-		os=-irix
-		;;
-	*-siemens)
-		os=-sysv4
-		;;
-	*-masscomp)
-		os=-rtu
-		;;
-	f30[01]-fujitsu | f700-fujitsu)
-		os=-uxpv
-		;;
-	*-rom68k)
-		os=-coff
-		;;
-	*-*bug)
-		os=-coff
-		;;
-	*-apple)
-		os=-macos
-		;;
-	*-atari*)
-		os=-mint
-		;;
-	*)
-		os=-none
-		;;
-esac
-fi
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
-	*-unknown)
-		case $os in
-			-riscix*)
-				vendor=acorn
-				;;
-			-sunos*)
-				vendor=sun
-				;;
-			-cnk*|-aix*)
-				vendor=ibm
-				;;
-			-beos*)
-				vendor=be
-				;;
-			-hpux*)
-				vendor=hp
-				;;
-			-mpeix*)
-				vendor=hp
-				;;
-			-hiux*)
-				vendor=hitachi
-				;;
-			-unos*)
-				vendor=crds
-				;;
-			-dgux*)
-				vendor=dg
-				;;
-			-luna*)
-				vendor=omron
-				;;
-			-genix*)
-				vendor=ns
-				;;
-			-mvs* | -opened*)
-				vendor=ibm
-				;;
-			-os400*)
-				vendor=ibm
-				;;
-			-ptx*)
-				vendor=sequent
-				;;
-			-tpf*)
-				vendor=ibm
-				;;
-			-vxsim* | -vxworks* | -windiss*)
-				vendor=wrs
-				;;
-			-aux*)
-				vendor=apple
-				;;
-			-hms*)
-				vendor=hitachi
-				;;
-			-mpw* | -macos*)
-				vendor=apple
-				;;
-			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
-				vendor=atari
-				;;
-			-vos*)
-				vendor=stratus
-				;;
-		esac
-		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
-		;;
-esac
-
-echo $basic_machine$os
-exit
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
+/usr/share/automake-1.11/config.sub
+\ No newline at end of file
diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am
index 0e556c6e2..27e3f1296 100644
--- a/volk/config/Makefile.am
+++ b/volk/config/Makefile.am
@@ -27,6 +27,8 @@ m4datadir = $(datadir)/aclocal
 # List your m4 macros here
 m4macros = \
 	acx_pthread.m4 \
+	ax_boost_base.m4 \
+	ax_boost_unit_test_framework.m4 \
 	bnv_have_qt.m4 \
 	cppunit.m4 \
 	gr_lib64.m4 \
@@ -45,6 +47,7 @@ m4macros = \
 	mkstemp.m4 \
 	onceonly.m4 \
 	pkg.m4 \
+	orc.m4 \
 	gcc_version_workaround.m4
 
 
diff --git a/volk/config/ax_boost_base.m4 b/volk/config/ax_boost_base.m4
new file mode 100644
index 000000000..e9790227e
--- /dev/null
+++ b/volk/config/ax_boost_base.m4
@@ -0,0 +1,334 @@
+# ===========================================================================
+#             http://autoconf-archive.cryp.to/ax_boost_base.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_BASE([MINIMUM-VERSION])
+#
+# DESCRIPTION
+#
+#   Test for the Boost C++ libraries of a particular version (or newer)
+#
+#   If no path to the installed boost library is given the macro searchs
+#   under /usr, /usr/local, /opt and /opt/local and evaluates the
+#   $BOOST_ROOT environment variable. Further documentation is available at
+#   <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
+#
+#   And sets:
+#
+#     HAVE_BOOST
+#
+# LAST MODIFICATION
+#
+#   2008-04-12
+#
+# COPYLEFT
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2008 Free Software Foundation, Inc.
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.
+
+AC_DEFUN([AX_BOOST_BASE],
+[
+AC_REQUIRE([GR_LIB64])
+AC_ARG_WITH([boost],
+    AS_HELP_STRING([--with-boost@<:@=DIR@:>@],
+		   [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]),
+    [
+    if test "$withval" = "no"; then
+        want_boost="no"
+    elif test "$withval" = "yes"; then
+        want_boost="yes"
+        ac_boost_path=""
+    else
+        want_boost="yes"
+        ac_boost_path="$withval"
+    fi
+    ],
+    [want_boost="yes"])
+
+
+AC_ARG_WITH([boost-libdir],
+        AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
+		       [Force given directory for boost libraries. Note that this
+		        will overwrite library path detection, so use this parameter
+		        only if default library detection fails and you know exactly
+                        where your boost libraries are located.]),
+        [
+        if test -d $withval
+        then
+                ac_boost_lib_path="$withval"
+        else
+                AC_MSG_ERROR(--with-boost-libdir expected directory name)
+        fi
+        ],
+        [ac_boost_lib_path=""]
+)
+
+if test "x$want_boost" = "xyes"; then
+    boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
+    boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
+    boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
+    boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
+    boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
+    if test "x$boost_lib_version_req_sub_minor" = "x" ; then
+        boost_lib_version_req_sub_minor="0"
+        fi
+    WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+  $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
+    AC_MSG_CHECKING(for boost >= $boost_lib_version_req)
+    succeeded=no
+
+    dnl first we check the system location for boost libraries
+    dnl this location ist chosen if boost libraries are installed with the --layout=system option
+    dnl or if you install boost with RPM
+    if test "$ac_boost_path" != ""; then
+	dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib.
+	dnl If that directory doesn't exist, fall back to the default behavior
+	if test -d "$ac_boost_path/lib${gr_libdir_suffix}"; then
+            BOOST_LDFLAGS="-L$ac_boost_path/lib${gr_libdir_suffix}"
+        else
+            BOOST_LDFLAGS="-L$ac_boost_path/lib"
+        fi
+        BOOST_CPPFLAGS="-I$ac_boost_path/include"
+    else
+        for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
+            if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
+		dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib.
+		dnl If that directory doesn't exist, fall back to the default behavior
+		if test -d "$ac_boost_path_tmp/lib${gr_libdir_suffix}"; then
+                    BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib${gr_libdir_suffix}"
+		else
+	            BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib"
+		fi
+                BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
+                break;
+            fi
+        done
+    fi
+
+    dnl overwrite ld flags if we have required special directory with
+    dnl --with-boost-libdir parameter
+    if test "$ac_boost_lib_path" != ""; then
+       BOOST_LDFLAGS="-L$ac_boost_lib_path"
+    fi
+
+    CPPFLAGS_SAVED="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+    export CPPFLAGS
+
+    LDFLAGS_SAVED="$LDFLAGS"
+    LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+    export LDFLAGS
+
+    AC_LANG_PUSH(C++)
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+    @%:@include <boost/version.hpp>
+    ]], [[
+    #if BOOST_VERSION >= $WANT_BOOST_VERSION
+    // Everything is okay
+    #else
+    #  error Boost version is too old
+    #endif
+    ]])],[AC_MSG_RESULT(yes)
+	  succeeded=yes
+	  found_system=yes
+          ],
+         [])
+    AC_LANG_POP([C++])
+    CPPFLAGS="$CPPFLAGS_SAVED"
+    LDFLAGS="$LDFLAGS_SAVED"
+
+
+    dnl if we found no boost with system layout we search for boost libraries
+    dnl built and installed without the --layout=system option
+    if test "$succeeded" != "yes"; then
+        _version=0
+
+        if test "$ac_boost_path" != ""; then
+	    path_list="$ac_boost_path"
+	else
+	    path_list="/usr /usr/local /opt /opt/local"
+	fi
+        for ac_boost_path in $path_list ; do
+	    if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
+            	for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
+		    _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's,/include/boost-,,; s,_,.,'`
+                    V_CHECK=`expr $_version_tmp \> $_version`
+                    if test "$V_CHECK" = "1" ; then
+                        _version=$_version_tmp
+                        best_path=$ac_boost_path
+		    fi
+                done
+            fi
+	done
+
+        VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+        BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
+
+        if test "$ac_boost_lib_path" = "";  then
+	    dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib.
+	    dnl If that directory doesn't exist, fall back to the default behavior
+	    if test -d "$best_path/lib${gr_libdir_suffix}"; then
+                BOOST_LDFLAGS="-L$best_path/lib${gr_libdir_suffix}"
+	    else
+                BOOST_LDFLAGS="-L$best_path/lib"
+	    fi
+        fi
+
+        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+        export CPPFLAGS
+        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+        export LDFLAGS
+
+        AC_LANG_PUSH(C++)
+            AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+        @%:@include <boost/version.hpp>
+        ]], [[
+        #if BOOST_VERSION >= $WANT_BOOST_VERSION
+        // Everything is okay
+        #else
+        #  error Boost version is too old
+        #endif
+        ]])],[AC_MSG_RESULT(yes)
+	      succeeded=yes
+              found_system=yes
+              ],
+	     [])
+        AC_LANG_POP([C++])
+        CPPFLAGS="$CPPFLAGS_SAVED"
+        LDFLAGS="$LDFLAGS_SAVED"
+    fi
+
+    if test "$succeeded" != "yes" ; then
+	AC_MSG_RESULT([no])
+        if test "$_version" = "0" ; then
+            AC_MSG_ERROR([[we could not detect the boost libraries (version $boost_lib_version_req_shorten or higher).
+If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>.]])
+        else
+            AC_MSG_ERROR([your boost libraries seem to old (version $_version).])
+        fi
+    else
+        AC_SUBST(BOOST_CPPFLAGS)
+        AC_SUBST(BOOST_LDFLAGS)
+        AC_DEFINE(HAVE_BOOST,1,[Define if the Boost headers are available])
+    fi
+fi
+])
+
+dnl
+dnl Macros used by the boost items that need libraries.
+dnl
+
+dnl $1 is unit name.  E.g., boost_thread
+AC_DEFUN([_AX_BOOST_CHECK_LIB],[
+    _AX_BOOST_CHECK_LIB_($1,HAVE_[]m4_toupper($1),m4_toupper($1)_LIB)
+])
+
+dnl $1 is unit name.  E.g., boost_thread
+dnl $2 is AC_DEFINE name.  E.g., HAVE_BOOST_THREAD
+dnl $3 is lib var name.    E.g., BOOST_THREAD_LIB
+AC_DEFUN([_AX_BOOST_CHECK_LIB_],[
+    AC_LANG_PUSH([C++])
+    AC_DEFINE($2,1,[Define if the $1 library is available])
+    BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+
+    dnl See if we can find a usable library
+    link_ok="no"
+    if test "$ax_boost_user_lib" != ""; then
+        dnl use what the user supplied 
+        for ax_lib in $ax_boost_user_lib $1-${ax_boost_user_lib}; do
+	    AC_CHECK_LIB($ax_lib, exit,
+                         [$3="-l$ax_lib"; AC_SUBST($3) link_ok="yes"; break])
+        done
+    else
+	dnl Look in BOOSTLIBDIR for possible candidates
+	head=$BOOSTLIBDIR/lib[]$1
+	for f in ${head}*.so* ${head}*.a* ${head}*.dll* ${head}*.dylib; do
+	    dnl echo 1: $f
+	    case $f in
+	      *\**) continue;;
+	    esac
+	    f=`echo $f | sed -e 's,.*/,,' -e 's,^lib,,'`
+	    dnl echo 2: $f
+	    f=`echo $f | sed -e 's,\($1.*\)\.so.*$,\1,' -e 's,\($1.*\)\.a.*$,\1,' -e 's,\($1.*\)\.dll.*$,\1,' -e 's,\($1.*\)\.dylib.*$,\1,'`
+	    dnl echo 3: $f
+
+	    ax_lib=$f
+            AC_CHECK_LIB($ax_lib, exit,
+                        [$3="-l$ax_lib"; AC_SUBST($3) link_ok="yes"; break])
+	done
+    fi		    
+		    		    
+    if test "$link_ok" != "yes"; then
+    	AC_MSG_ERROR([Could not link against lib[$1]!])
+    fi
+    AC_LANG_POP([C++])
+])
+
+
+dnl $1 is unit name.  E.g., boost_thread
+AC_DEFUN([_AX_BOOST_WITH],[
+    _AX_BOOST_WITH_($1,m4_bpatsubst($1,_,-))
+])
+
+dnl $1 is unit name.  E.g., boost_thread
+dnl $2 is hyphenated unit name.  E.g., boost-thread
+AC_DEFUN([_AX_BOOST_WITH_],[
+    AC_ARG_WITH([$2],
+    		AC_HELP_STRING([--with-$2@<:@=special-lib@:>@],
+		               [Use the m4_substr($1,6) library from boost.  It is possible to specify a certain
+		                library to the linker.  E.g., --with-$2=$1-gcc41-mt-1_35]),
+        	[
+	        if test "$withval" = "no"; then
+	            want_boost="no"
+	        elif test "$withval" = "yes"; then
+	            want_boost="yes"
+	            ax_boost_user_lib=""
+	        else
+	            want_boost="yes"
+	            ax_boost_user_lib="$withval"
+	        fi
+	        ],
+	        [want_boost="yes"])
+])
+
+dnl $1 is unit name.  E.g., boost_thread
+dnl $2 is AC_LANG_PROGRAM argument 1
+dnl $3 is AC_LANG_PROGRAM argument 2
+dnl $4 is cv variable name.  E.g., ax_cv_boost_thread
+AC_DEFUN([_AX_BOOST_CHECK_],[
+    _AX_BOOST_WITH($1)
+    if test "$want_boost" = "yes"; then
+        AC_REQUIRE([AC_PROG_CC])
+        AC_REQUIRE([AC_PROG_CXX])
+        CPPFLAGS_SAVED="$CPPFLAGS"
+        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+        LDFLAGS_SAVED="$LDFLAGS"
+        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+        AC_CACHE_CHECK([whether the boost::m4_substr([$1],6) includes are available], [$4],
+		       [AC_LANG_PUSH([C++])
+                        AC_COMPILE_IFELSE(AC_LANG_PROGRAM([$2],[$3]),[$4]=yes,[$4]=no)
+                        AC_LANG_POP([C++])
+                       ])
+	if test "$[$4]" = "yes"; then
+	    _AX_BOOST_CHECK_LIB([$1])
+	fi
+        CPPFLAGS="$CPPFLAGS_SAVED"
+        LDFLAGS="$LDFLAGS_SAVED"
+    fi
+])
+
+dnl $1 is unit name.  E.g., boost_thread
+dnl $2 is AC_LANG_PROGRAM argument 1
+dnl $3 is AC_LANG_PROGRAM argument 2
+AC_DEFUN([_AX_BOOST_CHECK],[
+    _AX_BOOST_CHECK_($1,$2,$3,ax_cv_$1)
+])
diff --git a/volk/config/ax_boost_unit_test_framework.m4 b/volk/config/ax_boost_unit_test_framework.m4
new file mode 100644
index 000000000..73affccfd
--- /dev/null
+++ b/volk/config/ax_boost_unit_test_framework.m4
@@ -0,0 +1,36 @@
+#
+# SYNOPSIS
+#
+#   AX_BOOST_UNIT_TEST_FRAMEWORK
+#
+# DESCRIPTION
+#
+#   Test for Unit_Test_Framework library from the Boost C++ libraries. The
+#   macro requires a preceding call to AX_BOOST_BASE.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_UNIT_TEST_FRAMEWORK
+#
+# COPYLEFT
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2008 Free Software Foundation, Inc.
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.
+
+AC_DEFUN([AX_BOOST_UNIT_TEST_FRAMEWORK],
+[
+    AC_REQUIRE([AX_BOOST_BASE])
+    _AX_BOOST_CHECK([boost_unit_test_framework],
+		    [@%:@include <boost/test/unit_test.hpp>],
+		    [using boost::unit_test::test_suite;
+                     test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" );
+                     return 0;])
+])
diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4
index c7a5fe960..f98b2dc5b 100644
--- a/volk/config/lv_configure.m4
+++ b/volk/config/lv_configure.m4
@@ -100,6 +100,9 @@ dnl  AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes])
   GR_PWIN32
   GR_LIBGNURADIO_CORE_EXTRA_LDFLAGS
   
+  dnl Check for liborc
+  ORC_CHECK
+  
   LDFLAGS="$LDFLAGS $LIBGNURADIO_CORE_EXTRA_LDFLAGS"
 
   AC_CHECK_PROG([XMLTO],[xmlto],[yes],[])
diff --git a/volk/config/orc.m4 b/volk/config/orc.m4
new file mode 100644
index 000000000..df0f3d6f3
--- /dev/null
+++ b/volk/config/orc.m4
@@ -0,0 +1,59 @@
+dnl pkg-config-based checks for Orc
+
+dnl specific:
+dnl ORC_CHECK([REQUIRED_VERSION])
+
+AC_DEFUN([ORC_CHECK],
+[
+  ORC_REQ=ifelse([$1], , "0.4.10", [$1])
+  
+  if test "x$enable_orc" != "xno" ; then
+    PKG_CHECK_MODULES(ORC, orc-0.4 >= $ORC_REQ, [
+      AC_DEFINE(LV_HAVE_ORC, 1, [Use Orc])
+      if test "x$ORCC" = "x" ; then
+        ORCC=`$PKG_CONFIG --variable=orcc orc-0.4`
+      fi
+      AC_SUBST(ORCC)
+      ORCC_FLAGS="--compat $ORC_REQ"
+      ORC_LDFLAGS=`$PKG_CONFIG --libs orc-0.4`
+      ORC_CFLAGS=`$PKG_CONFIG --cflags orc-0.4`
+      AC_SUBST(ORCC_FLAGS)
+      AC_SUBST(ORC_LDFLAGS)
+      AC_SUBST(ORC_CFLAGS)
+      LV_HAVE_ORC=yes
+      LV_HAVE_ORCC=yes
+      if test "x$cross_compiling" = "xyes" ; then
+        LV_HAVE_ORCC=no
+      fi
+    ], [
+      if test "x$enable_orc" = "xyes" ; then
+        AC_MSG_ERROR([--enable-orc specified, but Orc >= $ORC_REQ not found])
+      fi
+      AC_DEFINE(DISABLE_ORC, 1, [Disable Orc])
+      LV_HAVE_ORC=no
+      LV_HAVE_ORCC=no
+    ])
+  else
+    AC_DEFINE(DISABLE_ORC, 1, [Disable Orc])
+    LV_HAVE_ORC=no
+    LV_HAVE_ORCC=no
+  fi
+  AM_CONDITIONAL(LV_HAVE_ORC, [test "x$LV_HAVE_ORC" = "xyes"])
+  AM_CONDITIONAL(LV_HAVE_ORCC, [test "x$LV_HAVE_ORCC" = "xyes"])
+]))
+
+AC_DEFUN([ORC_OUTPUT],
+[
+  if test "$LV_HAVE_ORC" = yes ; then
+    printf "configure: *** Orc acceleration enabled.\n"
+  else
+    if test "x$enable_orc" = "xno" ; then
+      printf "configure: *** Orc acceleration disabled by --disable-orc.\n"
+    else
+      printf "configure: *** Orc acceleration disabled.  Requires Orc >= $ORC_REQ, which was\n"
+      printf "               not found.\n"
+    fi
+  fi
+  printf "\n"
+])
+
diff --git a/volk/configure.ac b/volk/configure.ac
index 7cbcbad53..c493adad6 100644
--- a/volk/configure.ac
+++ b/volk/configure.ac
@@ -18,7 +18,6 @@ dnl
 AC_INIT
 AC_PREREQ(2.57)
 AC_CONFIG_AUX_DIR([.])	
-AC_CONFIG_SRCDIR([lib/test_all.cc])
 AM_CONFIG_HEADER(config.h)
 AM_INIT_AUTOMAKE(volk,0.1)
 
@@ -44,7 +43,7 @@ dnl If you need additional boost libraries, you'll need to
 dnl uncomment AX_BOOST_BASE, plus some of the following:
 dnl
 dnl calls AC_SUBST(BOOST_CPPFLAGS), AC_SUBST(BOOST_LDFLAGS) and defines HAVE_BOOST
-dnl AX_BOOST_BASE([1.35])
+AX_BOOST_BASE([1.35])
 dnl
 dnl All the rest of these call AC_SUBST(BOOST_<foo>_LIB) and define HAVE_BOOST_<foo>
 dnl
@@ -57,7 +56,7 @@ dnl AX_BOOST_SERIALIZATION
 dnl AX_BOOST_SIGNALS
 dnl AX_BOOST_SYSTEM
 dnl AX_BOOST_TEST_EXEC_MONITOR
-dnl AX_BOOST_UNIT_TEST_FRAMEWORK
+AX_BOOST_UNIT_TEST_FRAMEWORK
 dnl AX_BOOST_WSERIALIZATION
 
 AC_CONFIG_HEADERS([volk_config.h])
@@ -69,6 +68,7 @@ AC_CONFIG_FILES([\
 	  include/Makefile \
 	  include/volk/Makefile \
 	  lib/Makefile \
+	  orc/Makefile \
 	  volk.pc \
 	])
 
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index 00289be1e..eb97775b0 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -41,94 +41,93 @@ volkinclude_HEADERS = \
 	volk.h \
 	volk_cpu.h \
 	volk_environment_init.h \
-	volk_16s_add_quad_aligned16.h \
-	volk_16s_branch_4_state_8_aligned16.h \
-	volk_16sc_deinterleave_16s_aligned16.h \
-	volk_16sc_deinterleave_32f_aligned16.h \
-	volk_16sc_deinterleave_real_16s_aligned16.h \
-	volk_16sc_deinterleave_real_32f_aligned16.h \
-	volk_16sc_deinterleave_real_8s_aligned16.h \
-	volk_16sc_magnitude_16s_aligned16.h \
-	volk_16sc_magnitude_32f_aligned16.h \
-	volk_16s_convert_32f_aligned16.h \
-	volk_16s_convert_32f_unaligned16.h \
-	volk_16s_convert_8s_aligned16.h \
-	volk_16s_convert_8s_unaligned16.h \
-	volk_16s_max_star_aligned16.h \
-	volk_16s_max_star_horizontal_aligned16.h \
-	volk_16s_permute_and_scalar_add_aligned16.h \
-	volk_16s_quad_max_star_aligned16.h \
-	volk_16u_byteswap_aligned16.h \
-	volk_32f_accumulator_aligned16.h \
-	volk_32f_add_aligned16.h \
-	volk_32fc_32f_multiply_aligned16.h \
-	volk_32fc_32f_power_32fc_aligned16.h \
-	volk_32f_calc_spectral_noise_floor_aligned16.h \
-	volk_32fc_atan2_32f_aligned16.h \
-	volk_32fc_conjugate_dot_prod_aligned16.h \
-	volk_32fc_conjugate_dot_prod_unaligned.h \
-	volk_32fc_deinterleave_32f_aligned16.h \
-	volk_32fc_deinterleave_64f_aligned16.h \
-	volk_32fc_deinterleave_real_16s_aligned16.h \
-	volk_32fc_deinterleave_real_32f_aligned16.h \
-	volk_32fc_deinterleave_real_64f_aligned16.h \
-	volk_32fc_dot_prod_aligned16.h \
-	volk_32fc_index_max_aligned16.h \
-	volk_32fc_magnitude_16s_aligned16.h \
-	volk_32fc_magnitude_32f_aligned16.h \
-	volk_32fc_multiply_aligned16.h \
-	volk_32f_convert_16s_aligned16.h \
-	volk_32f_convert_16s_unaligned16.h \
-	volk_32f_convert_32s_aligned16.h \
-	volk_32f_convert_32s_unaligned16.h \
-	volk_32f_convert_64f_aligned16.h \
-	volk_32f_convert_64f_unaligned16.h \
-	volk_32f_convert_8s_aligned16.h \
-	volk_32f_convert_8s_unaligned16.h \
-	volk_32fc_power_spectral_density_32f_aligned16.h \
-	volk_32fc_power_spectrum_32f_aligned16.h \
-	volk_32fc_square_dist_aligned16.h \
-	volk_32fc_square_dist_scalar_mult_aligned16.h \
-	volk_32f_divide_aligned16.h \
-	volk_32f_dot_prod_aligned16.h \
-	volk_32f_dot_prod_unaligned16.h \
-	volk_32f_fm_detect_aligned16.h \
-	volk_32f_index_max_aligned16.h \
-	volk_32f_interleave_16sc_aligned16.h \
-	volk_32f_interleave_32fc_aligned16.h \
-	volk_32f_max_aligned16.h \
-	volk_32f_min_aligned16.h \
-	volk_32f_multiply_aligned16.h \
-	volk_32f_normalize_aligned16.h \
-	volk_32f_power_aligned16.h \
-	volk_32f_sqrt_aligned16.h \
-	volk_32f_stddev_aligned16.h \
-	volk_32f_stddev_and_mean_aligned16.h \
-	volk_32f_subtract_aligned16.h \
-	volk_32f_sum_of_poly_aligned16.h \
-	volk_32s_and_aligned16.h \
-	volk_32s_convert_32f_aligned16.h \
-	volk_32s_convert_32f_unaligned16.h \
-	volk_32s_or_aligned16.h \
-	volk_32u_byteswap_aligned16.h \
-	volk_32u_popcnt_aligned16.h \
-	volk_64f_convert_32f_aligned16.h \
-	volk_64f_convert_32f_unaligned16.h \
-	volk_64f_max_aligned16.h \
-	volk_64f_min_aligned16.h \
-	volk_64u_byteswap_aligned16.h \
-	volk_64u_popcnt_aligned16.h \
-	volk_8sc_deinterleave_16s_aligned16.h \
-	volk_8sc_deinterleave_32f_aligned16.h \
-	volk_8sc_deinterleave_real_16s_aligned16.h \
-	volk_8sc_deinterleave_real_32f_aligned16.h \
-	volk_8sc_deinterleave_real_8s_aligned16.h \
-	volk_8sc_multiply_conjugate_16sc_aligned16.h \
-	volk_8sc_multiply_conjugate_32fc_aligned16.h \
-	volk_8s_convert_16s_aligned16.h \
-	volk_8s_convert_16s_unaligned16.h \
-	volk_8s_convert_32f_aligned16.h \
-	volk_8s_convert_32f_unaligned16.h 
+	volk_16i_x5_add_quad_16i_x4_a16.h \
+	volk_16i_branch_4_state_8_a16.h \
+	volk_16ic_deinterleave_16i_x2_a16.h \
+	volk_16ic_s32f_deinterleave_32f_x2_a16.h \
+	volk_16ic_deinterleave_real_16i_a16.h \
+	volk_16ic_s32f_deinterleave_real_32f_a16.h \
+	volk_16ic_deinterleave_real_8i_a16.h \
+	volk_16ic_magnitude_16i_a16.h \
+	volk_16ic_s32f_magnitude_32f_a16.h \
+	volk_16i_s32f_convert_32f_a16.h \
+	volk_16i_s32f_convert_32f_u.h \
+	volk_16i_convert_8i_a16.h \
+	volk_16i_convert_8i_u.h \
+	volk_16i_max_star_16i_a16.h \
+	volk_16i_max_star_horizontal_16i_a16.h \
+	volk_16i_permute_and_scalar_add_a16.h \
+	volk_16i_x4_quad_max_star_16i_a16.h \
+	volk_16u_byteswap_a16.h \
+	volk_32f_accumulator_s32f_a16.h \
+	volk_32f_x2_add_32f_a16.h \
+	volk_32fc_32f_multiply_32fc_a16.h \
+	volk_32fc_s32f_power_32fc_a16.h \
+	volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \
+	volk_32fc_s32f_atan2_32f_a16.h \
+	volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \
+	volk_32fc_deinterleave_32f_x2_a16.h \
+	volk_32fc_deinterleave_64f_x2_a16.h \
+	volk_32fc_s32f_deinterleave_real_16i_a16.h \
+	volk_32fc_deinterleave_real_32f_a16.h \
+	volk_32fc_deinterleave_real_64f_a16.h \
+	volk_32fc_x2_dot_prod_32fc_a16.h \
+	volk_32fc_index_max_16u_a16.h \
+	volk_32fc_s32f_magnitude_16i_a16.h \
+	volk_32fc_magnitude_32f_a16.h \
+	volk_32fc_x2_multiply_32fc_a16.h \
+	volk_32f_s32f_convert_16i_a16.h \
+	volk_32f_s32f_convert_16i_u.h \
+	volk_32f_s32f_convert_32i_a16.h \
+	volk_32f_s32f_convert_32i_u.h \
+	volk_32f_convert_64f_a16.h \
+	volk_32f_convert_64f_u.h \
+	volk_32f_s32f_convert_8i_a16.h \
+	volk_32f_s32f_convert_8i_u.h \
+	volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \
+	volk_32fc_s32f_power_spectrum_32f_a16.h \
+	volk_32fc_x2_square_dist_32f_a16.h \
+	volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \
+	volk_32f_x2_divide_32f_a16.h \
+	volk_32f_x2_dot_prod_32f_a16.h \
+	volk_32f_x2_dot_prod_32f_u.h \
+	volk_32f_s32f_32f_fm_detect_32f_a16.h \
+	volk_32f_index_max_16u_a16.h \
+	volk_32f_x2_s32f_interleave_16ic_a16.h \
+	volk_32f_x2_interleave_32fc_a16.h \
+	volk_32f_x2_max_32f_a16.h \
+	volk_32f_x2_min_32f_a16.h \
+	volk_32f_x2_multiply_32f_a16.h \
+	volk_32f_s32f_normalize_a16.h \
+	volk_32f_s32f_power_32f_a16.h \
+	volk_32f_sqrt_32f_a16.h \
+	volk_32f_s32f_stddev_32f_a16.h \
+	volk_32f_stddev_and_mean_32f_x2_a16.h \
+	volk_32f_x2_subtract_32f_a16.h \
+	volk_32f_x3_sum_of_poly_32f_a16.h \
+	volk_32i_x2_and_32i_a16.h \
+	volk_32i_s32f_convert_32f_a16.h \
+	volk_32i_s32f_convert_32f_u.h \
+	volk_32i_x2_or_32i_a16.h \
+	volk_32u_byteswap_a16.h \
+	volk_32u_popcnt_a16.h \
+	volk_64f_convert_32f_a16.h \
+	volk_64f_convert_32f_u.h \
+	volk_64f_x2_max_64f_a16.h \
+	volk_64f_x2_min_64f_a16.h \
+	volk_64u_byteswap_a16.h \
+	volk_64u_popcnt_a16.h \
+	volk_8ic_deinterleave_16i_x2_a16.h \
+	volk_8ic_s32f_deinterleave_32f_x2_a16.h \
+	volk_8ic_deinterleave_real_16i_a16.h \
+	volk_8ic_s32f_deinterleave_real_32f_a16.h \
+	volk_8ic_deinterleave_real_8i_a16.h \
+	volk_8ic_x2_multiply_conjugate_16ic_a16.h \
+	volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \
+	volk_8i_convert_16i_a16.h \
+	volk_8i_convert_16i_u.h \
+	volk_8i_s32f_convert_32f_a16.h \
+	volk_8i_s32f_convert_32f_u.h 
 
 VOLK_MKTABLES_SOURCES = \
 	$(platform_CODE) \
diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml
index b7c98500f..a19a5add9 100644
--- a/volk/include/volk/archs.xml
+++ b/volk/include/volk/archs.xml
@@ -5,6 +5,12 @@
   <flag>none</flag>
 </arch>
 
+<arch name="orc" type="all">
+  <flag>lorc-0.4</flag>
+  <overrule>LV_HAVE_ORC</overrule>
+  <overrule_val>no</overrule_val>
+</arch>
+
 <arch name="altivec" type="powerpc">
   <flag>maltivec</flag>
 </arch>
diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py
index f2432d7a4..6e75067d0 100644
--- a/volk/include/volk/make_c.py
+++ b/volk/include/volk/make_c.py
@@ -25,7 +25,6 @@ def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) :
     tempstring = tempstring + "    return 0;\n"
     tempstring = tempstring + "}\n"
 
-
     for i in range(len(funclist)): 
         tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n";
         
diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py
index 275d3869f..c74b0464d 100644
--- a/volk/include/volk/make_set_simd.py
+++ b/volk/include/volk/make_set_simd.py
@@ -95,7 +95,7 @@ def make_set_simd(dom) :
         arch = str(domarch.attributes["name"].value);    
         tempstring = tempstring + "  AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n";
     tempstring = tempstring + "  ADDONS=\"\"\n";
-    tempstring = tempstring + "  BUILT_ARCHS=\"generic\"\n";
+    tempstring = tempstring + "  BUILT_ARCHS=\"\"\n";
     tempstring = tempstring + "  _MAKE_FAKE_PROCCPU\n";
     tempstring = tempstring + "  OVERRULE_FLAG=\"no\"\n";
     tempstring = tempstring + "  if test -z \"$cf_with_lv_arch\"; then\n";
@@ -165,8 +165,22 @@ def make_set_simd(dom) :
             tempstring = tempstring + "    indCXX=no\n"
             tempstring = tempstring + "    indLV_ARCH=no\n"
         elif atype == "all":
+            tempstring = tempstring + "    for i in $cf_with_lv_arch\n"
+            tempstring = tempstring + "    do\n"
+            tempstring = tempstring + "      if test \"X$i\" = X" + arch + "; then\n";
+            tempstring = tempstring + "        indLV_ARCH=yes\n"
+            tempstring = tempstring + "      fi\n"
+            tempstring = tempstring + "    done\n"
+            tempstring = tempstring + "    if  test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+            tempstring = tempstring + "      indLV_ARCH=no\n"
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    if test \"$indLV_ARCH\" == \"yes\"; then\n"        
             tempstring = tempstring + "      AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
             tempstring = tempstring + "      LV_HAVE_" + arch.swapcase() + "=yes\n";
+            tempstring = tempstring + "      BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    indLV_ARCH=no\n"
+            
     tempstring = tempstring + "  ;;\n"
         
     tempstring = tempstring + "  (powerpc)\n"
@@ -210,14 +224,49 @@ def make_set_simd(dom) :
             tempstring = tempstring + "    indCXX=no\n"
             tempstring = tempstring + "    indLV_ARCH=no\n"
         elif atype == "all":
+            tempstring = tempstring + "    for i in $cf_with_lv_arch\n"
+            tempstring = tempstring + "    do\n"
+            tempstring = tempstring + "      if test \"X$i\" = X" + arch + "; then\n";
+            tempstring = tempstring + "        indLV_ARCH=yes\n"
+            tempstring = tempstring + "      fi\n"
+            tempstring = tempstring + "    done\n"
+            tempstring = tempstring + "    if  test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+            tempstring = tempstring + "      indLV_ARCH=no\n"
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    if test \"$indLV_ARCH\" == \"yes\"; then\n"
+            tempstring = tempstring + "      AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
+            tempstring = tempstring + "      LV_HAVE_" + arch.swapcase() + "=yes\n";
+            tempstring = tempstring + "      BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    indLV_ARCH=no\n"
+    tempstring = tempstring + "  ;;\n"
+    tempstring = tempstring + "  (*)\n"
+    for domarch in dom:
+        arch = str(domarch.attributes["name"].value);
+        atype = str(domarch.attributes["type"].value);
+        flag = domarch.getElementsByTagName("flag");
+        flag = str(flag[0].firstChild.data);
+        if atype == "all":
+            tempstring = tempstring + "    for i in $cf_with_lv_arch\n"
+            tempstring = tempstring + "    do\n"
+            tempstring = tempstring + "      if test \"X$i\" = X" + arch + "; then\n";
+            tempstring = tempstring + "        indLV_ARCH=yes\n"
+            tempstring = tempstring + "      fi\n"
+            tempstring = tempstring + "    done\n"
+            tempstring = tempstring + "    if  test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+            tempstring = tempstring + "      indLV_ARCH=no\n"
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    if test \"$indLV_ARCH\" == \"yes\"; then\n"        
             tempstring = tempstring + "      AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
             tempstring = tempstring + "      LV_HAVE_" + arch.swapcase() + "=yes\n";
+            tempstring = tempstring + "      BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+            tempstring = tempstring + "    fi\n"
+            tempstring = tempstring + "    indLV_ARCH=no\n"
     tempstring = tempstring + "  ;;\n"
     tempstring = tempstring + "  esac\n"
     tempstring = tempstring + "  LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n"
     tempstring = tempstring + "])\n"
    
     return tempstring;
-                
-                
+
         
diff --git a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h b/volk/include/volk/volk_16i_branch_4_state_8_a16.h
index fb9d7cb87..3437c1a6b 100644
--- a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h
+++ b/volk/include/volk/volk_16i_branch_4_state_8_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H
-#define INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H
+#define INCLUDED_volk_16i_branch_4_state_8_a16_H
 
 
 #include<inttypes.h>
@@ -14,7 +14,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16s_branch_4_state_8_aligned16_ssse3(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
+static inline  void volk_16i_branch_4_state_8_a16_ssse3(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
 	
   
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11;
@@ -138,7 +138,7 @@ static inline  void volk_16s_branch_4_state_8_aligned16_ssse3(short* target,  sh
 #endif /*LV_HAVE_SSEs*/
 
 #if LV_HAVE_GENERIC
-static inline  void volk_16s_branch_4_state_8_aligned16_generic(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
+static inline  void volk_16i_branch_4_state_8_a16_generic(short* target,  short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
 	int i = 0;
 	
 	int bound = 4;
@@ -191,4 +191,4 @@ static inline  void volk_16s_branch_4_state_8_aligned16_generic(short* target,
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/
diff --git a/volk/include/volk/volk_16s_convert_8s_aligned16.h b/volk/include/volk/volk_16i_convert_8i_a16.h
index 64c368688..73e45ad63 100644
--- a/volk/include/volk/volk_16s_convert_8s_aligned16.h
+++ b/volk/include/volk/volk_16i_convert_8i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H
-#define INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_convert_8i_a16_H
+#define INCLUDED_volk_16i_convert_8i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param outputVector The 8 bit output data buffer
   \param num_points The number of data values to be converted
 */
-static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -52,7 +52,7 @@ static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, cons
   \param outputVector The 8 bit output data buffer
   \param num_points The number of data values to be converted
 */
-static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -66,4 +66,4 @@ static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, c
 
 
 
-#endif /* INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H */
+#endif /* INCLUDED_volk_16i_convert_8i_a16_H */
diff --git a/volk/include/volk/volk_16s_convert_8s_unaligned16.h b/volk/include/volk/volk_16i_convert_8i_u.h
index ca925de86..5fc792b56 100644
--- a/volk/include/volk/volk_16s_convert_8s_unaligned16.h
+++ b/volk/include/volk/volk_16i_convert_8i_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H
-#define INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H
+#ifndef INCLUDED_volk_16i_convert_8i_u_H
+#define INCLUDED_volk_16i_convert_8i_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param num_points The number of data values to be converted
   \note Input and output buffers do NOT need to be properly aligned
 */
-static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -54,7 +54,7 @@ static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, co
   \param num_points The number of data values to be converted
   \note Input and output buffers do NOT need to be properly aligned
 */
-static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
+static inline void volk_16i_convert_8i_u_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -68,4 +68,4 @@ static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H */
+#endif /* INCLUDED_volk_16i_convert_8i_u_H */
diff --git a/volk/include/volk/volk_16s_max_star_aligned16.h b/volk/include/volk/volk_16i_max_star_16i_a16.h
index ba4e979ec..ff57bd2a1 100644
--- a/volk/include/volk/volk_16s_max_star_aligned16.h
+++ b/volk/include/volk/volk_16i_max_star_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H
-#define INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_max_star_16i_a16_H
+#define INCLUDED_volk_16i_max_star_16i_a16_H
 
 
 #include<inttypes.h>
@@ -12,7 +12,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16s_max_star_aligned16_ssse3(short* target, short* src0, unsigned int num_bytes) {
+static inline  void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) {
 
 
   
@@ -87,7 +87,7 @@ static inline  void volk_16s_max_star_aligned16_ssse3(short* target, short* src0
 
 #if LV_HAVE_GENERIC
 
-static inline void volk_16s_max_star_aligned16_generic(short* target, short* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -105,4 +105,4 @@ static inline void volk_16s_max_star_aligned16_generic(short* target, short* src
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/
diff --git a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h
index 82d011677..695e08dbf 100644
--- a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h
+++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H
-#define INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H
+#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H
 
 
 #include<inttypes.h>
@@ -12,7 +12,7 @@
 #include<emmintrin.h>
 #include<tmmintrin.h>
 
-static inline  void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline  void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) {
 
   const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
   const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d};
@@ -110,7 +110,7 @@ static inline  void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target
 
 
 #if LV_HAVE_GENERIC
-static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
+static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -127,4 +127,4 @@ static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* targe
 
 #endif /*LV_HAVE_GENERIC*/
 
-#endif /*INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/
diff --git a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h
index 452d05c4f..e52a949fb 100644
--- a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h
+++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
-#define INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H
+#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<emmintrin.h>
 
-static inline  void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target,  short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline  void volk_16i_permute_and_scalar_add_a16_sse2(short* target,  short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
 	
 
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -117,7 +117,7 @@ static inline  void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target
 
 
 #if LV_HAVE_GENERIC
-static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
+static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -136,4 +136,4 @@ static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* targ
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/
diff --git a/volk/include/volk/volk_16s_convert_32f_aligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h
index 126ce1528..83fd26ff9 100644
--- a/volk/include/volk/volk_16s_convert_32f_aligned16.h
+++ b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H
-#define INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H
+#define INCLUDED_volk_16i_s32f_convert_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
     
@@ -68,7 +68,7 @@ static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, co
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -102,7 +102,7 @@ static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -116,4 +116,4 @@ static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, c
 
 
 
-#endif /* INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */
diff --git a/volk/include/volk/volk_16s_convert_32f_unaligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h
index d6212fba5..8f0dd0083 100644
--- a/volk/include/volk/volk_16s_convert_32f_unaligned16.h
+++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H
-#define INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H
+#ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
+#define INCLUDED_volk_16i_s32f_convert_32f_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -15,7 +15,7 @@
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int eighthPoints = num_points / 8;
     
@@ -70,7 +70,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector,
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -105,7 +105,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, con
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_16i_s32f_convert_32f_u_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int16_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -119,4 +119,4 @@ static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H */
+#endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */
diff --git a/volk/include/volk/volk_16s_quad_max_star_aligned16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h
index 1004c4d23..e4ec5ab4e 100644
--- a/volk/include/volk/volk_16s_quad_max_star_aligned16.h
+++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H
-#define INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H
+#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 
 #include<emmintrin.h>
 
-static inline  void volk_16s_quad_max_star_aligned16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
+static inline  void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
 	
 
 
@@ -96,9 +96,9 @@ static inline  void volk_16s_quad_max_star_aligned16_sse2(short* target, short*
 
 	/*asm volatile
 		(
-		 "volk_16s_quad_max_star_aligned16_sse2_L1:\n\t"
+		 "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t"
 		 "cmp $0, %[bound]\n\t"
-		 "je volk_16s_quad_max_star_aligned16_sse2_END\n\t"
+		 "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t"
 
 		 "movaps (%[src0]), %%xmm1\n\t"
 		 "movaps (%[src1]), %%xmm2\n\t"
@@ -143,9 +143,9 @@ static inline  void volk_16s_quad_max_star_aligned16_sse2(short* target, short*
 
 		 "movaps %%xmm1, (%[target])\n\t"
 		 "addw $16, %[target]\n\t"
-		 "jmp volk_16s_quad_max_star_aligned16_sse2_L1\n\t"
+		 "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t"
 		 
-		 "volk_16s_quad_max_star_aligned16_sse2_END:\n\t"
+		 "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t"
 		 :
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target)
 		 :
@@ -168,7 +168,7 @@ static inline  void volk_16s_quad_max_star_aligned16_sse2(short* target, short*
 
 
 #if LV_HAVE_GENERIC
-static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
+static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -188,4 +188,4 @@ static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short
 
 #endif /*LV_HAVE_GENERIC*/
 
-#endif /*INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/
diff --git a/volk/include/volk/volk_16s_add_quad_aligned16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h
index 63042bef1..5744ca3a6 100644
--- a/volk/include/volk/volk_16s_add_quad_aligned16.h
+++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H
-#define INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H
+#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H
+#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H
 
 
 #include<inttypes.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<emmintrin.h>
 
-static inline  void volk_16s_add_quad_aligned16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline  void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
   
   __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
   __m128i *p_target0, *p_target1, *p_target2, *p_target3,  *p_src0, *p_src1, *p_src2, *p_src3, *p_src4;
@@ -65,9 +65,9 @@ static inline  void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ
   }
     /*asm volatile
 		(
-		 ".%=volk_16s_add_quad_aligned16_sse2_L1:\n\t"
+		 ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t"
 		 "cmp $0, %[bound]\n\t"
-		 "je .%=volk_16s_add_quad_aligned16_sse2_END\n\t"
+		 "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t"
 		 "movaps (%[src0]), %%xmm1\n\t"
 		 "movaps (%[src1]), %%xmm2\n\t"
 		 "movaps (%[src2]), %%xmm3\n\t"
@@ -91,8 +91,8 @@ static inline  void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ
 		 "add $16, %[target1]\n\t"
 		 "add $16, %[target2]\n\t"
 		 "add $16, %[target3]\n\t"
-		 "jmp .%=volk_16s_add_quad_aligned16_sse2_L1\n\t"
-		 ".%=volk_16s_add_quad_aligned16_sse2_END:\n\t"
+		 "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t"
+		 ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t"
 		 :
 		 :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3)
 		 :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
@@ -113,7 +113,7 @@ static inline  void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ
 
 #if LV_HAVE_GENERIC
 
-static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
+static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) {
 	
 	int i = 0;
 	
@@ -133,4 +133,4 @@ static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* ta
 
 
 
-#endif /*INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H*/
+#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/
diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h
index 32e13df98..7e08bf182 100644
--- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H
+#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer,
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -128,7 +128,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, i
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   int16_t* qBufferPtr = qBuffer;
@@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Deinterleaves the complex 16 bit vector into I & Q vector data
+  \param complexVector The complex input vector
+  \param iBuffer The I buffer output data
+  \param qBuffer The Q buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h
index b594c85b8..388c00592 100644
--- a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H
+#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -55,7 +55,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuf
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -103,7 +103,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuff
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (int16_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -117,4 +117,4 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iB
 
 
 
-#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h
index c0d1e941a..55a25702e 100644
--- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
+++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H
+#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
@@ -53,7 +53,7 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe
   number = sixteenthPoints * 16;
   int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
   for(; number < num_points; number++){
-    *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ / 256));
+    *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
     int16ComplexVectorPtr++;
   }
 }
@@ -66,18 +66,29 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
-  const int16_t* complexVectorPtr = (int16_t*)complexVector;
+  int16_t* complexVectorPtr = (int16_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
   for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (int8_t)(*complexVectorPtr++ / 256);
+    *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
     complexVectorPtr++;
   }
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
+  \param complexVector The complex input vector
+  \param iBuffer The I buffer output data
+  \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */
diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h
index 1482ab82e..bdcace750 100644
--- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
+++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H
+#define INCLUDED_volk_16ic_magnitude_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
    
@@ -84,7 +84,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -160,11 +160,11 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   int16_t* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
-  const float scalar = 32786.0;
+  const float scalar = 32768.0;
   for(number = 0; number < num_points; number++){
     float real = ((float)(*complexVectorPtr++)) / scalar;
     float imag = ((float)(*complexVectorPtr++)) / scalar;
@@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC_DISABLED
+/*!
+  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+  \param complexVector The vector containing the complex input values
+  \param magnitudeVector The vector containing the real output values
+  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
+static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+    volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
index 86f67437d..606de2fc5 100644
--- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
+#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The data value to be divided against each input data value of the input complex vector
     \param num_points The number of complex data values to be deinterleaved
   */
-static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
     float* iBufferPtr = iBuffer;
     float* qBufferPtr = qBuffer;
 
@@ -77,7 +77,7 @@ static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, floa
     \param scalar The data value to be divided against each input data value of the input complex vector
     \param num_points The number of complex data values to be deinterleaved
   */
-static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer,
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data
+    \param complexVector The complex input vector
+    \param iBuffer The I buffer output data
+    \param qBuffer The Q buffer output data
+    \param scalar The data value to be divided against each input data value of the input complex vector
+    \param num_points The number of complex data values to be deinterleaved
+  */
+extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
index 3e7be1e64..62331e496 100644
--- a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h
+++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
+#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuff
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -107,7 +107,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer,
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -122,4 +122,4 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuf
 
 
 
-#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */
diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
index 9c2a48835..ae64efbeb 100644
--- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H
-#define INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
+#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -79,7 +79,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -148,7 +148,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector,
   \param scalar The data value to be divided against each input data value of the input complex vector
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
   float* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC_DISABLED
+/*!
+  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+  \param complexVector The vector containing the complex input values
+  \param magnitudeVector The vector containing the real output values
+  \param scalar The data value to be divided against each input data value of the input complex vector
+  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */
diff --git a/volk/include/volk/volk_16u_byteswap_aligned16.h b/volk/include/volk/volk_16u_byteswap_a16.h
index 698e958e4..c8128dbab 100644
--- a/volk/include/volk/volk_16u_byteswap_aligned16.h
+++ b/volk/include/volk/volk_16u_byteswap_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H
-#define INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H
+#ifndef INCLUDED_volk_16u_byteswap_a16_H
+#define INCLUDED_volk_16u_byteswap_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsigned int num_points){
+static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){
   unsigned int number = 0;
   uint16_t* inputPtr = intsToSwap;
   __m128i input, left, right, output;
@@ -49,7 +49,7 @@ static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsign
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, unsigned int num_points){
+static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){
   unsigned int point;
   uint16_t* inputPtr = intsToSwap;
   for(point = 0; point < num_points; point++){
@@ -61,5 +61,17 @@ static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, uns
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Byteswaps (in-place) an aligned vector of int16_t's.
+  \param intsToSwap The vector of data to byte swap
+  \param numDataPoints The number of data points
+*/
+extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
+static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){
+    volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
 
-#endif /* INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H */
+#endif /* INCLUDED_volk_16u_byteswap_a16_H */
diff --git a/volk/include/volk/volk_32f_accumulator_aligned16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h
index 7e395cf50..4a3588e6d 100644
--- a/volk/include/volk/volk_32f_accumulator_aligned16.h
+++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H
-#define INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H
+#define INCLUDED_volk_32f_accumulator_s32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param inputBuffer The buffer of data to be accumulated
   \param num_points The number of values in inputBuffer to be accumulated
 */
-static inline void volk_32f_accumulator_aligned16_sse(float* result, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
@@ -49,7 +49,7 @@ static inline void volk_32f_accumulator_aligned16_sse(float* result, const float
   \param inputBuffer The buffer of data to be accumulated
   \param num_points The number of values in inputBuffer to be accumulated
 */
-static inline void volk_32f_accumulator_aligned16_generic(float* result, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){
   const float* aPtr = inputBuffer;
   unsigned int number = 0;
   float returnValue = 0;
@@ -64,4 +64,4 @@ static inline void volk_32f_accumulator_aligned16_generic(float* result, const f
 
 
 
-#endif /* INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_64f_aligned16.h b/volk/include/volk/volk_32f_convert_64f_a16.h
index 91a855813..c303dc118 100644
--- a/volk/include/volk/volk_32f_convert_64f_aligned16.h
+++ b/volk/include/volk/volk_32f_convert_64f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_convert_64f_a16_H
+#define INCLUDED_volk_32f_convert_64f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The float vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, con
   \param fVector The float vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){
   double* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_convert_64f_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_64f_unaligned16.h b/volk/include/volk/volk_32f_convert_64f_u.h
index 698e0d446..a825767de 100644
--- a/volk/include/volk/volk_32f_convert_64f_unaligned16.h
+++ b/volk/include/volk/volk_32f_convert_64f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H
+#ifndef INCLUDED_volk_32f_convert_64f_u_H
+#define INCLUDED_volk_32f_convert_64f_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The float vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, c
   \param fVector The float vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){
+static inline void volk_32f_convert_64f_u_generic(double* outputVector, const float* inputVector, unsigned int num_points){
   double* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H */
+#endif /* INCLUDED_volk_32f_convert_64f_u_H */
diff --git a/volk/include/volk/volk_32f_index_max_aligned16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h
index 26322bfa2..d070e17d5 100644
--- a/volk/include/volk/volk_32f_index_max_aligned16.h
+++ b/volk/include/volk/volk_32f_index_max_16u_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H
-#define INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_index_max_16u_a16_H
+#define INCLUDED_volk_32f_index_max_16u_a16_H
 
 #include <volk/volk_common.h>
 #include <inttypes.h>
@@ -8,7 +8,7 @@
 #if LV_HAVE_SSE4_1
 #include<smmintrin.h>
 
-static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
@@ -66,7 +66,7 @@ static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, con
 #if LV_HAVE_SSE
 #include<xmmintrin.h>
 
-static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
@@ -123,7 +123,7 @@ static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const
 #endif /*LV_HAVE_SSE*/
 
 #if LV_HAVE_GENERIC
-static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, const float* src0, unsigned int num_points) {
+static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) {
   if(num_points > 0){
     float max = src0[0];
     unsigned int index = 0;
@@ -145,4 +145,4 @@ static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, co
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/
diff --git a/volk/include/volk/volk_32f_fm_detect_aligned16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h
index c82239d74..ff4d5b19c 100644
--- a/volk/include/volk/volk_32f_fm_detect_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H
-#define INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H
+#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
   \param num_noints The number of real values in the input vector.
 */
-static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
+static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
   if (num_points < 1) {
     return;
   }
@@ -87,7 +87,7 @@ static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const f
   \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample.
   \param num_points The number of real values in the input vector.
 */
-static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
+static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){
   if (num_points < 1) {
     return;
   }
@@ -117,4 +117,4 @@ static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, con
 
 
 
-#endif /* INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
index ff917525f..168245d65 100644
--- a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-#define INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
+#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -16,7 +16,7 @@
   \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20
   \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
 */
-static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
+static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -127,7 +127,7 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noise
   \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20
   \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
 */
-static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
+static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){
   float sumMean = 0.0;
   unsigned int number;
   // find the sum (for mean), etc
@@ -164,4 +164,4 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* n
 
 
 
-#endif /* INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_16s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
index 7fbabd9c3..d6b16e336 100644
--- a/volk/include/volk/volk_32f_convert_16s_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H
+#define INCLUDED_volk_32f_s32f_convert_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
@@ -53,7 +53,7 @@ static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, co
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -93,7 +93,7 @@ static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, con
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -107,4 +107,4 @@ static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_16s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index d2bbdf13a..4d306e53c 100644
--- a/volk/include/volk/volk_32f_convert_16s_unaligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
+#define INCLUDED_volk_32f_s32f_convert_16i_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int eighthPoints = num_points / 8;
@@ -55,7 +55,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector,
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -96,7 +96,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, c
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -110,4 +110,4 @@ static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVecto
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */
diff --git a/volk/include/volk/volk_32f_convert_32s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
index 011ef5d0e..ae874fd7b 100644
--- a/volk/include/volk/volk_32f_convert_32s_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H
+#define INCLUDED_volk_32f_s32f_convert_32i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -49,7 +49,7 @@ static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, co
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -89,7 +89,7 @@ static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, con
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int32_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -103,4 +103,4 @@ static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_32s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index a6df826c7..561fcd800 100644
--- a/volk/include/volk/volk_32f_convert_32s_unaligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
+#define INCLUDED_volk_32f_s32f_convert_32i_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -51,7 +51,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector,
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -92,7 +92,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, c
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int32_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -106,4 +106,4 @@ static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVecto
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
diff --git a/volk/include/volk/volk_32f_convert_8s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
index b9487b622..f64f2a213 100644
--- a/volk/include/volk/volk_32f_convert_8s_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H
+#define INCLUDED_volk_32f_s32f_convert_8i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int sixteenthPoints = num_points / 16;
@@ -60,7 +60,7 @@ static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, cons
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -100,13 +100,13 @@ static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const
     \param scalar The value multiplied against each point in the input buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
 
   for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++  * scalar));
+    *outputVectorPtr++ = (int8_t)(*inputVectorPtr++  * scalar);
   }
 }
 #endif /* LV_HAVE_GENERIC */
@@ -114,4 +114,4 @@ static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, c
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */
diff --git a/volk/include/volk/volk_32f_convert_8s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index e986dbc87..420693571 100644
--- a/volk/include/volk/volk_32f_convert_8s_unaligned16.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H
-#define INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
+#define INCLUDED_volk_32f_s32f_convert_8i_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int sixteenthPoints = num_points / 16;
@@ -62,7 +62,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, co
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -103,7 +103,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, con
     \param num_points The number of data values to be converted
     \note Input buffer does NOT need to be properly aligned
   */
-static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
   int8_t* outputVectorPtr = outputVector;
   const float* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -117,4 +117,4 @@ static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
diff --git a/volk/include/volk/volk_32f_normalize_aligned16.h b/volk/include/volk/volk_32f_s32f_normalize_a16.h
index 1aabb1d9d..0850cddf7 100644
--- a/volk/include/volk/volk_32f_normalize_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_normalize_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H
-#define INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H
+#define INCLUDED_volk_32f_s32f_normalize_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param num_points The number of values in vecBuffer
   \param scalar The scale value to be applied to each buffer value
 */
-static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* inputPtr = vecBuffer;
 
@@ -49,7 +49,7 @@ static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const floa
   \param bVector One of the vectors to be normalizeed
   \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector
 */
-static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* inputPtr = vecBuffer;
   const float invScalar = 1.0 / scalar;
@@ -60,7 +60,22 @@ static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Normalizes the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be normalizeed
+  \param bVector One of the vectors to be normalizeed
+  \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector
+*/
+extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points);
+static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){
+    float invscalar = 1.0 / scalar;
+    volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
+}
+#endif /* LV_HAVE_GENERIC */
+
 
 
 
-#endif /* INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */
diff --git a/volk/include/volk/volk_32f_power_aligned16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h
index 2ecd8eecb..3ed594d9a 100644
--- a/volk/include/volk/volk_32f_power_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_POWER_ALIGNED16_H
-#define INCLUDED_VOLK_32f_POWER_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H
+#define INCLUDED_volk_32f_s32f_power_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -76,7 +76,7 @@ static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float*
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -127,7 +127,7 @@ static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVe
     \param power The power value to be applied to each data point
     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
   */
-static inline void volk_32f_power_aligned16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
+static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
   float* cPtr = cVector;
   const float* aPtr = aVector;
   unsigned int number = 0;
@@ -141,4 +141,4 @@ static inline void volk_32f_power_aligned16_generic(float* cVector, const float*
 
 
 
-#endif /* INCLUDED_VOLK_32f_POWER_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_stddev_aligned16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
index 1c6a08437..32f4fa067 100644
--- a/volk/include/volk/volk_32f_stddev_aligned16.h
+++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H
-#define INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H
+#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     unsigned int number = 0;
@@ -74,7 +74,7 @@ static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float*
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     unsigned int number = 0;
@@ -119,7 +119,7 @@ static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inp
   \param mean The mean of the input buffer
   \param num_points The number of values in input buffer to used in the stddev calculation
 */
-static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
+static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
   float returnValue = 0;
   if(num_points > 0){
     const float* aPtr = inputBuffer;
@@ -141,4 +141,4 @@ static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float*
 
 
 
-#endif /* INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_sqrt_aligned16.h b/volk/include/volk/volk_32f_sqrt_32f_a16.h
index 0b2eaf251..513c2cffe 100644
--- a/volk/include/volk/volk_32f_sqrt_aligned16.h
+++ b/volk/include/volk/volk_32f_sqrt_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_SQRT_ALIGNED16_H
-#define INCLUDED_VOLK_32f_SQRT_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H
+#define INCLUDED_volk_32f_sqrt_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param aVector One of the vectors to be sqrted
   \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
 */
-static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVector, unsigned int num_points){
+static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -47,7 +47,7 @@ static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVec
   \param aVector One of the vectors to be sqrted
   \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
 */
-static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* aVector, unsigned int num_points){
+static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     unsigned int number = 0;
@@ -58,7 +58,20 @@ static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float*
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int);
+/*!
+  \brief Sqrts the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be sqrted
+  \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
+*/
+static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){
+    volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points);
+}
+
+#endif /* LV_HAVE_ORC */
 
 
 
-#endif /* INCLUDED_VOLK_32f_SQRT_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
index 1cd502257..278089841 100644
--- a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h
+++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H
-#define INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
+#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -87,7 +87,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, floa
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -142,7 +142,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float*
   \param inputBuffer The buffer of points to calculate the std deviation for
   \param num_points The number of values in input buffer to used in the stddev and mean calculations
 */
-static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
+static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
   float returnValue = 0;
   float newMean = 0;
   if(num_points > 0){
@@ -166,4 +166,4 @@ static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, flo
 
 
 
-#endif /* INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */
diff --git a/volk/include/volk/volk_32f_add_aligned16.h b/volk/include/volk/volk_32f_x2_add_32f_a16.h
index 721c60fd6..d0d0e0a0e 100644
--- a/volk/include/volk/volk_32f_add_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_add_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_ADD_ALIGNED16_H
-#define INCLUDED_VOLK_32f_ADD_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H
+#define INCLUDED_volk_32f_x2_add_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be added
   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
 */
-static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVect
   \param bVector One of the vectors to be added
   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
 */
-static inline void volk_32f_add_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -63,7 +63,19 @@ static inline void volk_32f_add_aligned16_generic(float* cVector, const float* a
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Adds the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be added
+  \param bVector One of the vectors to be added
+  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32f_ADD_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_divide_aligned16.h b/volk/include/volk/volk_32f_x2_divide_32f_a16.h
index c00700cd8..d844e25b0 100644
--- a/volk/include/volk/volk_32f_divide_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_divide_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H
-#define INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H
+#define INCLUDED_volk_32f_x2_divide_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The divisor vector
   \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
 */
-static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aV
   \param bVector The divisor vector
   \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
 */
-static inline void volk_32f_divide_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -63,7 +63,20 @@ static inline void volk_32f_divide_aligned16_generic(float* cVector, const float
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Divides the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector The vector to be divideed
+  \param bVector The divisor vector
+  \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
+*/
+extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
 
-#endif /* INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_dot_prod_aligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
index 3aee1136a..61aa56815 100644
--- a/volk/include/volk/volk_32f_dot_prod_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H
-#define INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
+#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H
 
 #include<stdio.h>
 
@@ -7,7 +7,7 @@
 #if LV_HAVE_GENERIC
 
 
-static inline void volk_32f_dot_prod_aligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
 
   float dotProduct = 0;
   const float* aPtr = input;
@@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_aligned16_generic(float * result, const flo
 #if LV_HAVE_SSE
 
 
-static inline void volk_32f_dot_prod_aligned16_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
   
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
@@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_aligned16_sse( float* result, const  float*
 
 #include <pmmintrin.h>
 
-static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float
 
 #include <smmintrin.h>
 
-static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int sixteenthPoints = num_points / 16;
 
@@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const floa
 
 #endif /*LV_HAVE_SSE4_1*/
 
-#endif /*INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/
diff --git a/volk/include/volk/volk_32f_dot_prod_unaligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
index bce6aa15f..8469a3cea 100644
--- a/volk/include/volk/volk_32f_dot_prod_unaligned16.h
+++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H
-#define INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_u_H
+#define INCLUDED_volk_32f_x2_dot_prod_32f_u_H
 
 #include<stdio.h>
 
@@ -7,7 +7,7 @@
 #if LV_HAVE_GENERIC
 
 
-static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) {
 
   float dotProduct = 0;
   const float* aPtr = input;
@@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const f
 #if LV_HAVE_SSE
 
 
-static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  float* input, const  float* taps, unsigned int num_points) {
   
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
@@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const  floa
 
 #include <pmmintrin.h>
 
-static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * input, const float * taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const floa
 
 #include <smmintrin.h>
 
-static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
+static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) {
   unsigned int number = 0;
   const unsigned int sixteenthPoints = num_points / 16;
 
@@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const fl
 
 #endif /*LV_HAVE_SSE4_1*/
 
-#endif /*INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H*/
+#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_u_H*/
diff --git a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h
index 859c6a0ef..29c9392df 100644
--- a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H
-#define INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H
+#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param complexVector The complex output vector
   \param num_points The number of complex data values to be interleaved
 */
-static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
+static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
   unsigned int number = 0;
   float* complexVectorPtr = (float*)complexVector;
   const float* iBufferPtr = iBuffer;
@@ -56,7 +56,7 @@ static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVect
   \param complexVector The complex output vector
   \param num_points The number of complex data values to be interleaved
 */
-static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
+static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){
   float* complexVectorPtr = (float*)complexVector;
   const float* iBufferPtr = iBuffer;
   const float* qBufferPtr = qBuffer;
@@ -72,4 +72,4 @@ static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complex
 
 
 
-#endif /* INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */
diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_x2_max_32f_a16.h
index 96aafb2bf..26e7f1246 100644
--- a/volk/include/volk/volk_32f_max_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_max_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_MAX_ALIGNED16_H
-#define INCLUDED_VOLK_32f_MAX_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H
+#define INCLUDED_volk_32f_x2_max_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -53,7 +53,7 @@ static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVect
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_max_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+  \param cVector The vector where the results will be stored
+  \param aVector The vector to be checked
+  \param bVector The vector to be checked
+  \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
 
-#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_x2_min_32f_a16.h
index e247f4213..23bae044c 100644
--- a/volk/include/volk/volk_32f_min_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_min_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_MIN_ALIGNED16_H
-#define INCLUDED_VOLK_32f_MIN_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H
+#define INCLUDED_volk_32f_x2_min_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -53,7 +53,7 @@ static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVect
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_32f_min_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector
+  \param cVector The vector where the results will be stored
+  \param aVector The vector to be checked
+  \param bVector The vector to be checked
+  \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
 
-#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_multiply_aligned16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
index b557580ab..a0dcfa86e 100644
--- a/volk/include/volk/volk_32f_multiply_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H
-#define INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H
+#define INCLUDED_volk_32f_x2_multiply_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float*
   \param bVector One of the vectors to be multiplied
   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_32f_multiply_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -63,7 +63,19 @@ static inline void volk_32f_multiply_aligned16_generic(float* cVector, const flo
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Multiplys the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be multiplied
+  \param bVector One of the vectors to be multiplied
+  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
index 476946b88..30306774d 100644
--- a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H
-#define INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
+#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const float* iBufferPtr = iBuffer;
     const float* qBufferPtr = qBuffer;
@@ -72,7 +72,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVec
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const float* iBufferPtr = iBuffer;
     const float* qBufferPtr = qBuffer;
@@ -136,7 +136,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVect
     \param scalar The scaling value being multiplied against each data point
     \param num_points The number of complex data values to be interleaved
   */
-static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
+static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
   int16_t* complexVectorPtr = (int16_t*)complexVector;
   const float* iBufferPtr = iBuffer;
   const float* qBufferPtr = qBuffer;
@@ -152,4 +152,4 @@ static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complex
 
 
 
-#endif /* INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */
diff --git a/volk/include/volk/volk_32f_subtract_aligned16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h
index ac3f5e5d1..7404bfe79 100644
--- a/volk/include/volk/volk_32f_subtract_aligned16.h
+++ b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H
-#define INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H
+#define INCLUDED_volk_32f_x2_subtract_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be subtracted
   \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
 */
-static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float*
   \param bVector The vector to be subtracted
   \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
 */
-static inline void volk_32f_subtract_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
     float* cPtr = cVector;
     const float* aPtr = aVector;
     const float* bPtr=  bVector;
@@ -63,5 +63,19 @@ static inline void volk_32f_subtract_aligned16_generic(float* cVector, const flo
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Subtracts bVector form aVector and store their results in the cVector
+  \param cVector The vector where the results will be stored
+  \param aVector The initial vector
+  \param bVector The vector to be subtracted
+  \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
+*/
+extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+    volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
 
-#endif /* INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H */
+#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */
diff --git a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h
index a326e62b1..af9e39537 100644
--- a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h
+++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H
-#define INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H
+#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H
+#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -13,7 +13,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
+static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
   
   
   float result = 0.0;
@@ -100,7 +100,7 @@ static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src
 
 #if LV_HAVE_GENERIC
 
-static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
+static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) {
 
 
     
@@ -148,4 +148,4 @@ static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float*
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/
diff --git a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h
index 436656ca0..514998800 100644
--- a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h
+++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H
+#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, cons
     \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
     \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
   lv_32fc_t* cPtr = cVector;
   const lv_32fc_t* aPtr = aVector;
   const float* bPtr=  bVector;
@@ -76,7 +76,20 @@ static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector,
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector The complex vector to be multiplied
+    \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
+    \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+  */
+extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+    volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_GENERIC */
 
 
 
-#endif /* INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h
index 02085cd1e..84d2576ed 100644
--- a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H
+#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, floa
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer,
 
 
 
-#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h
index 3d9ebccdd..34262a7af 100644
--- a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H
+#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
 
     const float* complexVectorPtr = (float*)complexVector;
@@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, do
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   double* iBufferPtr = iBuffer;
@@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer,
 
 
 
-#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h
index 2af973bcc..9838ec88b 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H
+#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer,
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuf
 
 
 
-#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h
index f408589c4..af392d074 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h
+++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H
+#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
 
   const float* complexVectorPtr = (float*)complexVector;
@@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffe
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const float* complexVectorPtr = (float*)complexVector;
   double* iBufferPtr = iBuffer;
@@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBu
 
 
 
-#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */
diff --git a/volk/include/volk/volk_32fc_index_max_aligned16.h b/volk/include/volk/volk_32fc_index_max_16u_a16.h
index d77a95f90..532ae4e7c 100644
--- a/volk/include/volk/volk_32fc_index_max_aligned16.h
+++ b/volk/include/volk/volk_32fc_index_max_16u_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H
-#define INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H
+#define INCLUDED_volk_32fc_index_max_16u_a16_H
 
 #include <volk/volk_common.h>
 #include<inttypes.h>
@@ -11,7 +11,7 @@
 #include<pmmintrin.h>
 
 
-static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
+static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
   
   
   
@@ -189,7 +189,7 @@ static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_3
 #endif /*LV_HAVE_SSE3*/
 
 #if LV_HAVE_GENERIC
-static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
+static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) {
   float sq_dist = 0.0;
   float max = 0.0;
   unsigned int index = 0;
@@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, l
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_a16.h
index 7a8fd1ef9..be7216dce 100644
--- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_magnitude_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H
+#define INCLUDED_volk_32fc_magnitude_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -103,7 +103,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector,
     \param magnitudeVector The vector containing the real output values
     \param num_points The number of complex values in complexVector to be calculated and stored into cVector
   */
-static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+    \param complexVector The vector containing the complex input values
+    \param magnitudeVector The vector containing the real output values
+    \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+  */
+extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
+static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+    volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */
diff --git a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h
index df0ebb987..e9f74438d 100644
--- a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H
+#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* outPtr = outputVector;
 
@@ -81,7 +81,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector,  co
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   float* outPtr = outputVector;
 
@@ -139,7 +139,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector,  const
   \param normalizeFactor The atan2 results will be divided by this normalization factor.
   \param num_points The number of complex values in the input vector.
 */
-static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
   float* outPtr = outputVector;
   const float* inPtr = (float*)inputVector;
   const float invNormalizeFactor = 1.0 / normalizeFactor;
@@ -155,4 +155,4 @@ static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, co
 
 
 
-#endif /* INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
index 3026b2422..31465bff9 100644
--- a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
+#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -62,7 +62,7 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffe
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   unsigned int number = 0;
@@ -77,4 +77,4 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iB
 
 
 
-#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
index 4e64d8c22..530359600 100644
--- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
+#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -69,7 +69,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -128,7 +128,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto
   \param magnitudeVector The vector containing the real output values
   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
 */
-static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
   const float* complexVectorPtr = (float*)complexVector;
   int16_t* magnitudeVectorPtr = magnitudeVector;
   unsigned int number = 0;
@@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector
+  \param complexVector The vector containing the complex input values
+  \param scalar The scale value multiplied to the magnitude of each complex vector
+  \param magnitudeVector The vector containing the real output values
+  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+    volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */
diff --git a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h
index 2d71ee4f8..3507fdb3c 100644
--- a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H
+#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -18,7 +18,7 @@
   \param power The power value to be applied to each data point
   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
 */
-static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
+static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
   
@@ -89,7 +89,7 @@ static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, co
     \param power The power value to be applied to each data point
     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
   */
-static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
+static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){
   lv_32fc_t* cPtr = cVector;
   const lv_32fc_t* aPtr = aVector;
   unsigned int number = 0;
@@ -106,4 +106,4 @@ static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector
 
 
 
-#endif /* INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */
diff --git a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h
index 645629b9d..39d8f7aa2 100644
--- a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H
+#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@
   \param normalizationFactor This value is divided against all the input values before the power is calculated
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
   const float* inputPtr = (const float*)complexFFTInput;
   float* destPtr = logPowerOutput;
   uint64_t number = 0;
@@ -96,7 +96,7 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOu
   \param normalizationFactor This value is divided agains all the input values before the power is calculated
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
+static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){
   // Calculate the Power of the complex point
   const float* inputPtr = (float*)complexFFTInput;
   float* realFFTDataPointsPtr = logPowerOutput;
@@ -123,4 +123,4 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowe
 
 
 
-#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */
diff --git a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h
index 52ec0f95b..0120b5307 100644
--- a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H
+#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -20,7 +20,7 @@
   \param rbw The resolution bandwith of the fft spectrum
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
+static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
   const float* inputPtr = (const float*)complexFFTInput;
   float* destPtr = logPowerOutput;
   uint64_t number = 0;
@@ -103,7 +103,7 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* lo
   \param rbw The resolution bandwith of the fft spectrum
   \param num_points The number of fft data points
 */
-static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
+static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){
   // Calculate the Power of the complex point
   const float* inputPtr = (float*)complexFFTInput;
   float* realFFTDataPointsPtr = logPowerOutput;
@@ -131,4 +131,4 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float*
 
 
 
-#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */
diff --git a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
index 60103c1b5..a01971df3 100644
--- a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h
+++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
+#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H
 
 #include<volk/volk_complex.h>
 #include<stdio.h>
@@ -8,7 +8,7 @@
 #if LV_HAVE_GENERIC
 
 
-static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   float * res = (float*) result;
   float * in = (float*) input;
@@ -62,7 +62,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* res
 #if LV_HAVE_SSE && LV_HAVE_64
 
 
-static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
   
@@ -203,7 +203,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result,
 #endif
 
 #if LV_HAVE_SSE && LV_HAVE_32
-static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000};
 
@@ -341,4 +341,4 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* resu
 
 
 
-#endif /*INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/
diff --git a/volk/include/volk/volk_32fc_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
index 1a834dc25..9a7b65ab4 100644
--- a/volk/include/volk/volk_32fc_dot_prod_aligned16.h
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
+#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H
 
 #include <volk/volk_complex.h>
 #include <stdio.h>
@@ -9,7 +9,7 @@
 #if LV_HAVE_GENERIC 
 
 
-static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   float * res = (float*) result;
   float * in = (float*) input;
@@ -59,7 +59,7 @@ static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const
 #if LV_HAVE_SSE && LV_HAVE_64
 
 
-static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
 
   asm 
@@ -194,7 +194,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const
 
 #if LV_HAVE_SSE && LV_HAVE_32
 
-static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
   asm volatile 
     (
@@ -320,7 +320,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const
 
 #include <pmmintrin.h>
 
-static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
   
 
   lv_32fc_t dotProduct;
@@ -377,8 +377,8 @@ static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv
 
 #include <smmintrin.h>
 
-static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
-  volk_32fc_dot_prod_aligned16_sse3(result, input, taps, num_bytes);
+static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) {
+  volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes);
   // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now
    /* 
     __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
@@ -465,4 +465,4 @@ static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const
 
 #endif /*LV_HAVE_SSE4_1*/
 
-#endif /*INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/
diff --git a/volk/include/volk/volk_32fc_multiply_aligned16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h
index 6a1649fdb..b4214f5d2 100644
--- a/volk/include/volk/volk_32fc_multiply_aligned16.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h
@@ -1,9 +1,10 @@
-#ifndef INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H
-#define INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H
+#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
 #include <volk/volk_complex.h>
+#include <float.h>
 
 #if LV_HAVE_SSE3
 #include <pmmintrin.h>
@@ -14,7 +15,7 @@
     \param bVector One of the vectors to be multiplied
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -60,7 +61,7 @@ static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const l
     \param bVector One of the vectors to be multiplied
     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
   */
-static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
     lv_32fc_t* cPtr = cVector;
     const lv_32fc_t* aPtr = aVector;
     const lv_32fc_t* bPtr=  bVector;
@@ -72,7 +73,22 @@ static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, cons
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+  */
+extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
+static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+    volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
+
 
 
 
-#endif /* INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H */
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */
diff --git a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h
index 0fcc86f1e..6a863b16d 100644
--- a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h
+++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-#define INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H
+#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -10,7 +10,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
   
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
@@ -106,7 +106,7 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* targe
 #endif /*LV_HAVE_SSE3*/
 
 #if LV_HAVE_GENERIC
-static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
+static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
   int i = 0; 
@@ -123,4 +123,4 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* ta
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/
diff --git a/volk/include/volk/volk_32fc_square_dist_aligned16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h
index 6458ea4dd..406097fc8 100644
--- a/volk/include/volk/volk_32fc_square_dist_aligned16.h
+++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H
-#define INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H
+#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H
+#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H
 
 #include<inttypes.h>
 #include<stdio.h>
@@ -9,7 +9,7 @@
 #include<xmmintrin.h>
 #include<pmmintrin.h>
 
-static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
   
 
   __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
@@ -92,7 +92,7 @@ static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t
 #endif /*LV_HAVE_SSE3*/
 
 #if LV_HAVE_GENERIC
-static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
+static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) {
   lv_32fc_t diff;
   float sq_dist;
   int i = 0; 
@@ -109,4 +109,4 @@ static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32f
 #endif /*LV_HAVE_GENERIC*/
 
 
-#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H*/
+#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/
diff --git a/volk/include/volk/volk_32s_convert_32f_aligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h
index a407e68bd..0fcadd9cb 100644
--- a/volk/include/volk/volk_32s_convert_32f_aligned16.h
+++ b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H
-#define INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H
+#define INCLUDED_volk_32i_s32f_convert_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -55,7 +55,7 @@ static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, cons
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int32_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -70,4 +70,4 @@ static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, c
 
 
 
-#endif /* INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */
diff --git a/volk/include/volk/volk_32s_convert_32f_unaligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h
index ad7d4eb17..1dd6422f8 100644
--- a/volk/include/volk/volk_32s_convert_32f_unaligned16.h
+++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H
-#define INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H
+#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
+#define INCLUDED_volk_32i_s32f_convert_32f_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -15,7 +15,7 @@
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
     
@@ -57,7 +57,7 @@ static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, co
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_32i_s32f_convert_32f_u_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int32_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -72,4 +72,4 @@ static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H */
+#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
diff --git a/volk/include/volk/volk_32s_and_aligned16.h b/volk/include/volk/volk_32i_x2_and_32i_a16.h
index e9f1e3a43..3baa1d856 100644
--- a/volk/include/volk/volk_32s_and_aligned16.h
+++ b/volk/include/volk/volk_32i_x2_and_32i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32s_AND_ALIGNED16_H
-#define INCLUDED_VOLK_32s_AND_ALIGNED16_H
+#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H
+#define INCLUDED_volk_32i_x2_and_32i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors
   \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
 */
-static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* a
   \param bVector One of the vectors
   \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
 */
-static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     int32_t* cPtr = cVector;
     const int32_t* aPtr = aVector;
     const int32_t* bPtr=  bVector;
@@ -63,7 +63,19 @@ static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Ands the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors
+  \param bVector One of the vectors
+  \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
+*/
+extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+    volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32s_AND_ALIGNED16_H */
+#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */
diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32i_x2_or_32i_a16.h
index f4c427c4d..0be22f00a 100644
--- a/volk/include/volk/volk_32s_or_aligned16.h
+++ b/volk/include/volk/volk_32i_x2_or_32i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32s_OR_ALIGNED16_H
-#define INCLUDED_VOLK_32s_OR_ALIGNED16_H
+#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H
+#define INCLUDED_volk_32i_x2_or_32i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector One of the vectors to be ored
   \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
 */
-static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int quarterPoints = num_points / 4;
 
@@ -51,7 +51,7 @@ static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aV
   \param bVector One of the vectors to be ored
   \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
 */
-static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
     int32_t* cPtr = cVector;
     const int32_t* aPtr = aVector;
     const int32_t* bPtr=  bVector;
@@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+/*!
+  \brief Ors the two input vectors and store their results in the third vector
+  \param cVector The vector where the results will be stored
+  \param aVector One of the vectors to be ored
+  \param bVector One of the vectors to be ored
+  \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
+*/
+extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+    volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
-
-#endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */
+#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */
diff --git a/volk/include/volk/volk_32u_byteswap_aligned16.h b/volk/include/volk/volk_32u_byteswap_a16.h
index 09173a9d5..7556ec7b1 100644
--- a/volk/include/volk/volk_32u_byteswap_aligned16.h
+++ b/volk/include/volk/volk_32u_byteswap_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H
-#define INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H
+#ifndef INCLUDED_volk_32u_byteswap_a16_H
+#define INCLUDED_volk_32u_byteswap_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsigned int num_points){
+static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){
   unsigned int number = 0;
 
   uint32_t* inputPtr = intsToSwap;
@@ -57,7 +57,7 @@ static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsign
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, unsigned int num_points){
+static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){
   uint32_t* inputPtr = intsToSwap;
 
   unsigned int point;
@@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, uns
 
 
 
-#endif /* INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H */
+#endif /* INCLUDED_volk_32u_byteswap_a16_H */
diff --git a/volk/include/volk/volk_32u_popcnt_aligned16.h b/volk/include/volk/volk_32u_popcnt_a16.h
index 37cfd112c..f6e25e4e8 100644
--- a/volk/include/volk/volk_32u_popcnt_aligned16.h
+++ b/volk/include/volk/volk_32u_popcnt_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H
-#define INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H
+#ifndef INCLUDED_VOLK_32u_POPCNT_A16_H
+#define INCLUDED_VOLK_32u_POPCNT_A16_H
 
 #include <stdio.h>
 #include <inttypes.h>
@@ -7,7 +7,7 @@
 
 #if LV_HAVE_GENERIC
 
-static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32_t value) {
+static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) {
 
   // This is faster than a lookup table
   uint32_t retVal = value;
@@ -27,10 +27,10 @@ static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32
 
 #include <nmmintrin.h>
 
-static inline void volk_32u_popcnt_aligned16_sse4_2(uint32_t* ret, const uint32_t value) {
+static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) {
   *ret = _mm_popcnt_u32(value);
 }
 
 #endif /*LV_HAVE_SSE4_2*/
 
-#endif /*INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H*/
+#endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/
diff --git a/volk/include/volk/volk_64f_convert_32f_aligned16.h b/volk/include/volk/volk_64f_convert_32f_a16.h
index 44df66104..7dca065f0 100644
--- a/volk/include/volk/volk_64f_convert_32f_aligned16.h
+++ b/volk/include/volk/volk_64f_convert_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H
-#define INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_64f_convert_32f_a16_H
+#define INCLUDED_volk_64f_convert_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The double vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, cons
   \param fVector The double vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const double* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, c
 
 
 
-#endif /* INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_64f_convert_32f_a16_H */
diff --git a/volk/include/volk/volk_64f_convert_32f_unaligned16.h b/volk/include/volk/volk_64f_convert_32f_u.h
index 08cfb6127..6338c1433 100644
--- a/volk/include/volk/volk_64f_convert_32f_unaligned16.h
+++ b/volk/include/volk/volk_64f_convert_32f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H
-#define INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H
+#ifndef INCLUDED_volk_64f_convert_32f_u_H
+#define INCLUDED_volk_64f_convert_32f_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
     \param fVector The double vector values to be converted
     \param num_points The number of points in the two vectors to be converted
   */
-static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
   unsigned int number = 0;
 
   const unsigned int quarterPoints = num_points / 4;
@@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, co
   \param fVector The double vector values to be converted
   \param num_points The number of points in the two vectors to be converted
 */
-static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){
+static inline void volk_64f_convert_32f_u_generic(float* outputVector, const double* inputVector, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const double* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector,
 
 
 
-#endif /* INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H */
+#endif /* INCLUDED_volk_64f_convert_32f_u_H */
diff --git a/volk/include/volk/volk_64f_max_aligned16.h b/volk/include/volk/volk_64f_x2_max_64f_a16.h
index ce4907a8c..4b0c1f5f1 100644
--- a/volk/include/volk/volk_64f_max_aligned16.h
+++ b/volk/include/volk/volk_64f_x2_max_64f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64f_MAX_ALIGNED16_H
-#define INCLUDED_VOLK_64f_MAX_ALIGNED16_H
+#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H
+#define INCLUDED_volk_64f_x2_max_64f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -53,7 +53,7 @@ static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aV
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_max_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     double* cPtr = cVector;
     const double* aPtr = aVector;
     const double* bPtr=  bVector;
@@ -68,4 +68,4 @@ static inline void volk_64f_max_aligned16_generic(double* cVector, const double*
 #endif /* LV_HAVE_GENERIC */
 
 
-#endif /* INCLUDED_VOLK_64f_MAX_ALIGNED16_H */
+#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */
diff --git a/volk/include/volk/volk_64f_min_aligned16.h b/volk/include/volk/volk_64f_x2_min_64f_a16.h
index acf4d6b2a..aa961e384 100644
--- a/volk/include/volk/volk_64f_min_aligned16.h
+++ b/volk/include/volk/volk_64f_x2_min_64f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64f_MIN_ALIGNED16_H
-#define INCLUDED_VOLK_64f_MIN_ALIGNED16_H
+#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H
+#define INCLUDED_volk_64f_x2_min_64f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int halfPoints = num_points / 2;
 
@@ -53,7 +53,7 @@ static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aV
   \param bVector The vector to be checked
   \param num_points The number of values in aVector and bVector to be checked and stored into cVector
 */
-static inline void volk_64f_min_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
+static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){
     double* cPtr = cVector;
     const double* aPtr = aVector;
     const double* bPtr=  bVector;
@@ -68,4 +68,4 @@ static inline void volk_64f_min_aligned16_generic(double* cVector, const double*
 #endif /* LV_HAVE_GENERIC */
 
 
-#endif /* INCLUDED_VOLK_64f_MIN_ALIGNED16_H */
+#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */
diff --git a/volk/include/volk/volk_64u_byteswap_aligned16.h b/volk/include/volk/volk_64u_byteswap_a16.h
index d5e1b6f30..0eefe0138 100644
--- a/volk/include/volk/volk_64u_byteswap_aligned16.h
+++ b/volk/include/volk/volk_64u_byteswap_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H
-#define INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H
+#ifndef INCLUDED_volk_64u_byteswap_a16_H
+#define INCLUDED_volk_64u_byteswap_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsigned int num_points){
+static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){
     uint32_t* inputPtr = (uint32_t*)intsToSwap;
     __m128i input, byte1, byte2, byte3, byte4, output;
     __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
@@ -65,7 +65,7 @@ static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsign
   \param intsToSwap The vector of data to byte swap
   \param numDataPoints The number of data points
 */
-static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, unsigned int num_points){
+static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){
   uint32_t* inputPtr = (uint32_t*)intsToSwap;
   unsigned int point;
   for(point = 0; point < num_points; point++){
@@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, uns
 
 
 
-#endif /* INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H */
+#endif /* INCLUDED_volk_64u_byteswap_a16_H */
diff --git a/volk/include/volk/volk_64u_popcnt_aligned16.h b/volk/include/volk/volk_64u_popcnt_a16.h
index 4d62f9375..59511dc29 100644
--- a/volk/include/volk/volk_64u_popcnt_aligned16.h
+++ b/volk/include/volk/volk_64u_popcnt_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
-#define INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
+#ifndef INCLUDED_volk_64u_popcnt_a16_H
+#define INCLUDED_volk_64u_popcnt_a16_H
 
 #include <stdio.h>
 #include <inttypes.h>
@@ -8,7 +8,7 @@
 #if LV_HAVE_GENERIC
 
 
-static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64_t value) {
+static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) {
 
   const uint32_t* valueVector = (const uint32_t*)&value;
   
@@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64
 
 #include <nmmintrin.h>
 
-static inline void volk_64u_popcnt_aligned16_sse4_2(uint64_t* ret, const uint64_t value) {
+static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) {
   *ret = _mm_popcnt_u64(value);
 
 }
 
 #endif /*LV_HAVE_SSE4_2*/
 
-#endif /*INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H*/
+#endif /*INCLUDED_volk_64u_popcnt_a16_H*/
diff --git a/volk/include/volk/volk_8s_convert_16s_aligned16.h b/volk/include/volk/volk_8i_convert_16i_a16.h
index 0efe3c6a1..3d7045753 100644
--- a/volk/include/volk/volk_8s_convert_16s_aligned16.h
+++ b/volk/include/volk/volk_8i_convert_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H
-#define INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_8i_convert_16i_a16_H
+#define INCLUDED_volk_8i_convert_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
     \param outputVector The 16 bit output data buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
 
@@ -54,7 +54,7 @@ static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, c
     \param outputVector The 16 bit output data buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -65,6 +65,18 @@ static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector,
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Converts the input 8 bit integer data into 16 bit integer data
+    \param inputVector The 8 bit input data buffer
+    \param outputVector The 16 bit output data buffer
+    \param num_points The number of data values to be converted
+  */
+extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points);
+static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+    volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
 
diff --git a/volk/include/volk/volk_8s_convert_16s_unaligned16.h b/volk/include/volk/volk_8i_convert_16i_u.h
index 05b916cea..bcff13406 100644
--- a/volk/include/volk/volk_8s_convert_16s_unaligned16.h
+++ b/volk/include/volk/volk_8i_convert_16i_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H
-#define INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H
+#ifndef INCLUDED_volk_8i_convert_16i_u_H
+#define INCLUDED_volk_8i_convert_16i_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param num_points The number of data values to be converted
     \note Input and output buffers do NOT need to be properly aligned
   */
-static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
 
@@ -56,7 +56,7 @@ static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector,
     \param num_points The number of data values to be converted
     \note Input and output buffers do NOT need to be properly aligned
   */
-static inline void volk_8s_convert_16s_unaligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+static inline void volk_8i_convert_16i_u_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
   int16_t* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
diff --git a/volk/include/volk/volk_8s_convert_32f_aligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h
index 54b66ef8f..99a24ec10 100644
--- a/volk/include/volk/volk_8s_convert_32f_aligned16.h
+++ b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H
-#define INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H
+#define INCLUDED_volk_8i_s32f_convert_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -74,7 +74,7 @@ static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, con
     \param scalar The value divided against each point in the output buffer
     \param num_points The number of data values to be converted
   */
-static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
@@ -86,6 +86,20 @@ static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, co
 }
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_ORC
+  /*!
+    \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+    \param inputVector The 8 bit input data buffer
+    \param outputVector The floating point output data buffer
+    \param scalar The value divided against each point in the output buffer
+    \param num_points The number of data values to be converted
+  */
+extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points);
+static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+    float invscalar = 1.0 / scalar;
+    volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
 
 
 
diff --git a/volk/include/volk/volk_8s_convert_32f_unaligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h
index 8019aac9a..1e30957e8 100644
--- a/volk/include/volk/volk_8s_convert_32f_unaligned16.h
+++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H
-#define INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H
+#ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
+#define INCLUDED_volk_8i_s32f_convert_32f_u_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -15,7 +15,7 @@
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
     unsigned int number = 0;
     const unsigned int sixteenthPoints = num_points / 16;
     
@@ -76,7 +76,7 @@ static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, c
     \param num_points The number of data values to be converted
     \note Output buffer does NOT need to be properly aligned
   */
-static inline void volk_8s_convert_32f_unaligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+static inline void volk_8i_s32f_convert_32f_u_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
   float* outputVectorPtr = outputVector;
   const int8_t* inputVectorPtr = inputVector;
   unsigned int number = 0;
diff --git a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h
index 38eaa49ea..91c9b2c58 100644
--- a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H
+#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -59,7 +59,7 @@ static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer,
   \param qBuffer The Q buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
   int16_t* qBufferPtr = qBuffer;
@@ -74,4 +74,4 @@ static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer,
 
 
 
-#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */
diff --git a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h
index d0cb49494..bf3dc20dd 100644
--- a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H
+#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -49,7 +49,7 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuf
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   int16_t* iBufferPtr = iBuffer;
@@ -63,4 +63,4 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBu
 
 
 
-#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */
diff --git a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h
index d84d64568..13de79423 100644
--- a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h
+++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h
@@ -12,7 +12,7 @@
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
@@ -50,7 +50,7 @@ static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer
   \param iBuffer The I buffer output data
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
+static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (int8_t*)complexVector;
   int8_t* iBufferPtr = iBuffer;
diff --git a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
index d0c118965..22c3ebb23 100644
--- a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
+#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
 
@@ -84,7 +84,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, fl
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
 
@@ -145,7 +145,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   float* iBufferPtr = iBuffer;
   float* qBufferPtr = qBuffer;
@@ -161,4 +161,4 @@ static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, f
 
 
 
-#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */
diff --git a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
index c849448ea..5f1430394 100644
--- a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h
+++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
+#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -13,7 +13,7 @@
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -70,7 +70,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffe
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   float* iBufferPtr = iBuffer;
 
   unsigned int number = 0;
@@ -115,7 +115,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer,
   \param scalar The scaling value being multiplied against each data point
   \param num_points The number of complex data values to be deinterleaved
 */
-static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
   float* iBufferPtr = iBuffer;
@@ -130,4 +130,4 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuff
 
 
 
-#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */
diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h
index 470a67539..d9cacbf46 100644
--- a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h
+++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
+#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
+static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -76,7 +76,7 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t*
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
+static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
   unsigned int number = 0;
   int16_t* c16Ptr = (int16_t*)cVector;
   int8_t* a8Ptr = (int8_t*)aVector;
@@ -99,4 +99,4 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t*
 
 
 
-#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */
diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h
index 52b444cf7..6ec923a4f 100644
--- a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h
+++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h
@@ -1,5 +1,5 @@
-#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H
-#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H
+#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H
+#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H
 
 #include <inttypes.h>
 #include <stdio.h>
@@ -14,7 +14,7 @@
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   const unsigned int quarterPoints = num_points / 4;
 
@@ -95,7 +95,7 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t*
   \param bVector The complex vector which will be converted to complex conjugate and multiplied
   \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
 */
-static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
+static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
   unsigned int number = 0;
   float* cPtr = (float*)cVector;
   const float invScalar = 1.0 / scalar;
@@ -119,4 +119,4 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t*
 
 
 
-#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H */
+#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */
diff --git a/volk/include/volk/volk_register.py b/volk/include/volk/volk_register.py
index 9fded9a3e..bc8f959af 100755
--- a/volk/include/volk/volk_register.py
+++ b/volk/include/volk/volk_register.py
@@ -55,7 +55,7 @@ functions = [];
 
 
 for line in mfile:
-    subline = re.search(".*(aligned).*", line);
+    subline = re.search(".*_(a16|u)\.h.*", line);
     if subline:
         subsubline = re.search("(?<=volk_).*", subline.group(0));
         if subsubline:
@@ -70,11 +70,10 @@ datatypes = set(datatypes);
 for line in mfile:
     for dt in datatypes:
         if dt in line:
-            subline = re.search("(volk_" + dt +"_.*(aligned).*\.h)", line);
+            subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line);
             if subline:
                 
                 subsubline = re.search(".+(?=\.h)", subline.group(0));
-                
                 functions.append(subsubline.group(0));
 
 archs = [];
diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 446ff574f..af7c7f335 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -24,18 +24,19 @@ include $(top_srcdir)/Makefile.common
 # of a hack. Figure out the right way to do this to find built
 # volk_config.h and volk_tables.h
 
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
 	-I$(top_builddir)/include \
 	$(LV_CXXFLAGS) $(WITH_INCLUDES)
 
 
-# We build 2 libraries and 1 executable here.  One library contains
-# everything except the libcppunit QA code, and one contains only the
-# libcppunit-based QA code.  The C++ QA code is especially recommended
+# We build 1 library and 1 executable here.  The library contains
+# everything except the QA code. The C++ QA code is especially recommended
 # when you have general purpose C or C++ code that may not get
 # thoroughly exercised by building and running a GR block.  The
 # executable runs the QA code at "make check" time.
 #
+#
+#
 # N.B., If there's a SWIG generated shared library and associated
 # python code, it will be contained in ../python, not here.  (That
 # code is conditionally built depending on the state of the
@@ -44,13 +45,14 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 
 # list of programs run by "make check" and "make distcheck"
-#TESTS = test_all
-
+#TESTS = testqa
+#orc stuff gets built in the ORC directory conditional to ORC being enabled.
+#it gets linked in during the build of libvolk as an added library.
+#there might be a better way to do this.
 
 lib_LTLIBRARIES = \
 	libvolk.la \
-	libvolk_runtime.la \
-	libvolk_qa.la
+	libvolk_runtime.la
 
 EXTRA_DIST = \
 	volk_mktables.c		\
@@ -72,221 +74,63 @@ libvolk_la_SOURCES = 		\
 	volk.c 			\
 	volk_environment_init.c
 
+volk_orc_LDFLAGS = \
+	$(ORC_LDFLAGS) \
+	-lorc-0.4
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+volk_orc_LIBADD = \
+	../orc/libvolk_orc.la
 
+if LV_HAVE_ORC
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_la_LIBADD = $(volk_orc_LIBADD)
+else
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
 libvolk_la_LIBADD =
-
+endif
 
 
 # ----------------------------------------------------------------
 #        The QA library.  Note libvolk.la in LIBADD
 # ----------------------------------------------------------------
-libvolk_qa_la_SOURCES = \
-	qa_volk.cc \
-	qa_16s_quad_max_star_aligned16.cc \
-	qa_32fc_dot_prod_aligned16.cc \
-	qa_32fc_square_dist_aligned16.cc \
-	qa_32fc_square_dist_scalar_mult_aligned16.cc \
-	qa_32f_sum_of_poly_aligned16.cc \
-	qa_32fc_index_max_aligned16.cc \
-	qa_32f_index_max_aligned16.cc \
-	qa_32fc_conjugate_dot_prod_aligned16.cc \
-	qa_32fc_conjugate_dot_prod_unaligned.cc \
-	qa_16s_permute_and_scalar_add_aligned16.cc \
-	qa_16s_branch_4_state_8_aligned16.cc \
-	qa_16s_max_star_horizontal_aligned16.cc \
-	qa_16s_max_star_aligned16.cc \
-	qa_16s_add_quad_aligned16.cc \
-	qa_32f_add_aligned16.cc \
-	qa_32f_subtract_aligned16.cc \
-	qa_32f_max_aligned16.cc \
-	qa_32f_min_aligned16.cc \
-	qa_64f_max_aligned16.cc \
-	qa_64f_min_aligned16.cc \
-	qa_32s_and_aligned16.cc \
-	qa_32s_or_aligned16.cc \
-	qa_32f_dot_prod_aligned16.cc \
-	qa_32f_dot_prod_unaligned16.cc \
-	qa_32f_fm_detect_aligned16.cc \
-	qa_32fc_32f_multiply_aligned16.cc \
-	qa_32fc_multiply_aligned16.cc \
-	qa_32f_divide_aligned16.cc \
-	qa_32f_multiply_aligned16.cc \
-	qa_32f_sqrt_aligned16.cc \
-	qa_8sc_multiply_conjugate_16sc_aligned16.cc \
-	qa_8sc_multiply_conjugate_32fc_aligned16.cc \
-	qa_32u_popcnt_aligned16.cc \
-	qa_64u_popcnt_aligned16.cc \
-	qa_64u_byteswap_aligned16.cc \
-	qa_8sc_deinterleave_32f_aligned16.cc \
-	qa_16sc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_16s_aligned16.cc \
-	qa_32f_interleave_32fc_aligned16.cc \
-	qa_16u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_16s_aligned16.cc \
-	qa_32fc_deinterleave_real_32f_aligned16.cc \
-	qa_32fc_magnitude_32f_aligned16.cc \
-	qa_32fc_deinterleave_real_64f_aligned16.cc \
-	qa_32fc_deinterleave_real_16s_aligned16.cc \
-	qa_32fc_magnitude_16s_aligned16.cc \
-	qa_32fc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_real_8s_aligned16.cc \
-	qa_32fc_deinterleave_64f_aligned16.cc \
-	qa_32f_interleave_16sc_aligned16.cc \
-	qa_16sc_deinterleave_real_8s_aligned16.cc \
-	qa_16sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_32f_aligned16.cc \
-	qa_32u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_real_16s_aligned16.cc \
-	qa_8sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_16s_aligned16.cc \
-	qa_32f_normalize_aligned16.cc \
-	qa_8sc_deinterleave_real_16s_aligned16.cc \
-	qa_16s_convert_32f_aligned16.cc \
-	qa_16s_convert_32f_unaligned16.cc \
-	qa_16s_convert_8s_aligned16.cc \
-	qa_16s_convert_8s_unaligned16.cc \
-	qa_32f_convert_16s_aligned16.cc \
-	qa_32f_convert_16s_unaligned16.cc \
-	qa_32f_convert_32s_aligned16.cc \
-	qa_32f_convert_32s_unaligned16.cc \
-	qa_32f_convert_64f_aligned16.cc \
-	qa_32f_convert_64f_unaligned16.cc \
-	qa_32f_convert_8s_aligned16.cc \
-	qa_32f_convert_8s_unaligned16.cc \
-	qa_32s_convert_32f_aligned16.cc \
-	qa_32s_convert_32f_unaligned16.cc \
-	qa_64f_convert_32f_aligned16.cc \
-	qa_64f_convert_32f_unaligned16.cc \
-	qa_8s_convert_16s_aligned16.cc \
-	qa_8s_convert_16s_unaligned16.cc \
-	qa_8s_convert_32f_aligned16.cc \
-	qa_8s_convert_32f_unaligned16.cc \
-	qa_32fc_32f_power_32fc_aligned16.cc \
-	qa_32f_power_aligned16.cc \
-	qa_32fc_atan2_32f_aligned16.cc \
-	qa_32fc_power_spectral_density_32f_aligned16.cc \
-	qa_32fc_power_spectrum_32f_aligned16.cc \
-	qa_32f_calc_spectral_noise_floor_aligned16.cc \
-	qa_32f_accumulator_aligned16.cc \
-	qa_32f_stddev_aligned16.cc \
-	qa_32f_stddev_and_mean_aligned16.cc
+#libvolk_qa_la_SOURCES = \
+#	qa_utils.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
+#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost
 
-libvolk_qa_la_LIBADD = \
-	libvolk.la \
-	libvolk_runtime.la \
-	$(CPPUNIT_LIBS)
+#libvolk_qa_la_LIBADD = \
+#	libvolk.la \
+#	libvolk_runtime.la
 
 # ----------------------------------------------------------------
 # headers that don't get installed
 # ----------------------------------------------------------------
 noinst_HEADERS = \
 	volk_init.h \
-	qa_volk.h \
-	gcc_x86_cpuid.h \
-	qa_16s_quad_max_star_aligned16.h \
-	qa_32fc_dot_prod_aligned16.h \
-	qa_32fc_square_dist_aligned16.h \
-	qa_32fc_square_dist_scalar_mult_aligned16.h \
-	qa_32f_sum_of_poly_aligned16.h \
-	qa_32fc_index_max_aligned16.h \
-	qa_32f_index_max_aligned16.h \
-	qa_32fc_conjugate_dot_prod_aligned16.h \
-	qa_32fc_conjugate_dot_prod_unaligned.h \
-	qa_16s_permute_and_scalar_add_aligned16.h \
-	qa_16s_branch_4_state_8_aligned16.h \
-	qa_16s_max_star_horizontal_aligned16.h \
-	qa_16s_max_star_aligned16.h \
-	qa_16s_add_quad_aligned16.h \
-	qa_32f_add_aligned16.h \
-	qa_32f_subtract_aligned16.h \
-	qa_32f_max_aligned16.h \
-	qa_32f_min_aligned16.h \
-	qa_64f_max_aligned16.h \
-	qa_64f_min_aligned16.h \
-	qa_32s_and_aligned16.h \
-	qa_32s_or_aligned16.h \
-	qa_32f_dot_prod_aligned16.h \
-	qa_32f_dot_prod_unaligned16.h \
-	qa_32f_fm_detect_aligned16.h \
-	qa_32fc_32f_multiply_aligned16.h \
-	qa_32fc_multiply_aligned16.h \
-	qa_32f_divide_aligned16.h \
-	qa_32f_multiply_aligned16.h \
-	qa_32f_sqrt_aligned16.h \
-	qa_8sc_multiply_conjugate_16sc_aligned16.h \
-	qa_8sc_multiply_conjugate_32fc_aligned16.h \
-	qa_32u_popcnt_aligned16.h \
-	qa_64u_popcnt_aligned16.h \
-	qa_64u_byteswap_aligned16.h \
-	qa_8sc_deinterleave_32f_aligned16.h \
-	qa_16sc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_16s_aligned16.h \
-	qa_32f_interleave_32fc_aligned16.h \
-	qa_16u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_16s_aligned16.h \
-	qa_32fc_deinterleave_real_32f_aligned16.h \
-	qa_32fc_magnitude_32f_aligned16.h \
-	qa_32fc_deinterleave_real_64f_aligned16.h \
-	qa_32fc_deinterleave_real_16s_aligned16.h \
-	qa_32fc_magnitude_16s_aligned16.h \
-	qa_32fc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_real_8s_aligned16.h \
-	qa_32fc_deinterleave_64f_aligned16.h \
-	qa_32f_interleave_16sc_aligned16.h \
-	qa_16sc_deinterleave_real_8s_aligned16.h \
-	qa_16sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_32f_aligned16.h \
-	qa_32u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_real_16s_aligned16.h \
-	qa_8sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_16s_aligned16.h \
-	qa_32f_normalize_aligned16.h \
-	qa_8sc_deinterleave_real_16s_aligned16.h \
-	qa_16s_convert_32f_aligned16.h \
-	qa_16s_convert_32f_unaligned16.h \
-	qa_16s_convert_8s_aligned16.h \
-	qa_16s_convert_8s_unaligned16.h \
-	qa_32f_convert_16s_aligned16.h \
-	qa_32f_convert_16s_unaligned16.h \
-	qa_32f_convert_32s_aligned16.h \
-	qa_32f_convert_32s_unaligned16.h \
-	qa_32f_convert_64f_aligned16.h \
-	qa_32f_convert_64f_unaligned16.h \
-	qa_32f_convert_8s_aligned16.h \
-	qa_32f_convert_8s_unaligned16.h \
-	qa_32s_convert_32f_aligned16.h \
-	qa_32s_convert_32f_unaligned16.h \
-	qa_64f_convert_32f_aligned16.h \
-	qa_64f_convert_32f_unaligned16.h \
-	qa_8s_convert_16s_aligned16.h \
-	qa_8s_convert_16s_unaligned16.h \
-	qa_8s_convert_32f_aligned16.h \
-	qa_8s_convert_32f_unaligned16.h \
-	qa_32fc_32f_power_32fc_aligned16.h \
-	qa_32f_power_aligned16.h \
-	qa_32fc_atan2_32f_aligned16.h \
-	qa_32fc_power_spectral_density_32f_aligned16.h \
-	qa_32fc_power_spectrum_32f_aligned16.h \
-	qa_32f_calc_spectral_noise_floor_aligned16.h \
-	qa_32f_accumulator_aligned16.h \
-	qa_32f_stddev_aligned16.h \
-	qa_32f_stddev_and_mean_aligned16.h
-
+	qa_utils.h \
+	assembly.h
 
 # ----------------------------------------------------------------
 # Our test program
 # ----------------------------------------------------------------
 noinst_PROGRAMS = \
-	test_all
-
-test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk.la libvolk_runtime.la libvolk_qa.la
+	testqa
 
+testqa_SOURCES = testqa.cc qa_utils.cc
+testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS)
+testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
+if LV_HAVE_ORC
+testqa_LDADD  = \
+	libvolk.la \
+	libvolk_runtime.la \
+	../orc/libvolk_orc.la
+else 
+testqa_LDADD  = \
+	libvolk.la \
+	libvolk_runtime.la
+endif
 
 distclean-local: 
 	rm -f volk.c
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc
deleted file mode 100644
index 6215f4a64..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <volk/volk_16s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h
deleted file mode 100644
index ef813d96f..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc
deleted file mode 100644
index 46c2e48ac..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <volk/volk_16s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h
deleted file mode 100644
index aeb04f770..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc
deleted file mode 100644
index 8225aa0cf..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <volk/volk_16s_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h
deleted file mode 100644
index 2e409d0cc..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc
deleted file mode 100644
index e6ce5030e..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <volk/volk_16s_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h
deleted file mode 100644
index 4b2fe9e42..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc
deleted file mode 100644
index c6f828ba6..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <volk/volk_16s_max_star_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-
-void qa_16s_max_star_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 6400;
-  const int ITERS = 100000;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[1] __attribute__ ((aligned (16)));
-
-  short output1[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-
-    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    
-    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h
deleted file mode 100644
index 119f87c4d..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
deleted file mode 100644
index 0a58570e2..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <volk/volk_16s_max_star_horizontal_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-
-  
-  volk_runtime_init();
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 32;
-  const int ITERS = 1;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[vlen>>1] __attribute__ ((aligned (16)));
-
-  short output1[vlen>>1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    short minus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_horizontal_aligned\n");
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    /*    volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  
-  for(int i = 0; i < (vlen >> 1); ++i) {
-    //    printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-    
-  }
-  for(int i = 0; i < (vlen >> 1); ++i) {
-      
-      CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-    }
-	}
-   
-  
-#endif
-	
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h
deleted file mode 100644
index 9f9757253..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index c775e8596..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse21[i]);
-
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 995ab5b34..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index b25094e89..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-  float output_sse21[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index fea3b6c2d..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index c67064ea6..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index ebb70b97a..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index f86f03b88..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE */
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE4_1 */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index e83426473..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index dd446567e..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 04e5511e5..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
deleted file mode 100644
index 9799ef43b..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <volk/volk_16sc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h
deleted file mode 100644
index 4664b70f4..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
deleted file mode 100644
index 1ebe644c5..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <volk/volk_16sc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  float scale = 32768.0;
-  for(int i = 0; i < vlen; ++i) {   
-    float re = (float)(input0[i].real())/scale;
-    float im = (float)(input0[i].imag())/scale;
-    output_known[i] = sqrt(re*re + im*im);
-  }
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 100; ++i) {
-    printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag());
-    printf("generic... %f == %f\n", output_generic[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h
deleted file mode 100644
index 0c25673ea..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
deleted file mode 100644
index ea117a820..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <volk/volk_16u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint16_t output0[vlen] __attribute__ ((aligned (16)));
-  uint16_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint16_t));
-
-  printf("16u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h
deleted file mode 100644
index e11b23e3f..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc
deleted file mode 100644
index 0defef283..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <volk/volk_32f_accumulator_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_accumulator_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_accumulator_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float accumulator_generic;
-  float accumulator_sse;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_accumulator_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h
deleted file mode 100644
index 0004d3ff0..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_accumulator_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
deleted file mode 100644
index f80d562d4..000000000
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_add_aligned16.h>
-#include <volk/volk_32f_add_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_add_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] + input1[i];
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_add_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h
deleted file mode 100644
index 58e2a151c..000000000
--- a/volk/lib/qa_32f_add_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H
-#define INCLUDED_QA_32F_ADD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_add_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_add_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
deleted file mode 100644
index 5d6987333..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h>
-#include <cstdlib>
-#include <math.h>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[1] __attribute__ ((aligned (16)));
-  float output01[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_calc_spectral_noise_floor_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
deleted file mode 100644
index c5dce2c4b..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc
deleted file mode 100644
index 3e2452e68..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <volk/volk_32f_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h
deleted file mode 100644
index fce1eb417..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc
deleted file mode 100644
index e016b7ff7..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <volk/volk_32f_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h
deleted file mode 100644
index 492bc80e6..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc
deleted file mode 100644
index abceb52fb..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <volk/volk_32f_convert_32s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h
deleted file mode 100644
index 97d854463..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc
deleted file mode 100644
index 90f84b56f..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <volk/volk_32f_convert_32s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h
deleted file mode 100644
index 5d662d86d..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc
deleted file mode 100644
index 1d0754ac9..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <volk/volk_32f_convert_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h
deleted file mode 100644
index 41eb3e094..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc
deleted file mode 100644
index 6f7d5066d..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <volk/volk_32f_convert_64f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h
deleted file mode 100644
index 4b144f033..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc
deleted file mode 100644
index 6a53629b5..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <volk/volk_32f_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h
deleted file mode 100644
index 68a523f34..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc
deleted file mode 100644
index fbc5c20e6..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <volk/volk_32f_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h
deleted file mode 100644
index 88d4ff42a..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
deleted file mode 100644
index 3257a3751..000000000
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_divide_aligned16.h>
-#include <volk/volk_32f_divide_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_divide_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] / input1[i];
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_divide_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h
deleted file mode 100644
index 79d5ae4b8..000000000
--- a/volk/lib/qa_32f_divide_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_divide_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc
deleted file mode 100644
index 98c1f2d99..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_aligned16::t1() {
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h
deleted file mode 100644
index 6931a9e98..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc
deleted file mode 100644
index 8e97d4249..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_unaligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_unaligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  input = &input[1]; // Make sure the buffer is unaligned
-  taps = &taps[1]; // Make sure the buffer is unaligned
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_unaligned16\n");
-  
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(&input[-1]);
-  free(&taps[-1]);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h
deleted file mode 100644
index e8bad07fe..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
deleted file mode 100644
index a7ae60780..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <volk/volk_32f_interleave_16sc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_16sc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01);
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h
deleted file mode 100644
index 8d2914817..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
deleted file mode 100644
index 333b6fce8..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <volk/volk_32f_interleave_32fc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<float> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<float> output_sse[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_32fc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h
deleted file mode 100644
index cba518d37..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
deleted file mode 100644
index ceb913cb4..000000000
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_max_aligned16.h>
-#include <volk/volk_32f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_max_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h
deleted file mode 100644
index d535479f4..000000000
--- a/volk/lib/qa_32f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H
-#define INCLUDED_QA_32F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
deleted file mode 100644
index 580a60e7d..000000000
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_min_aligned16.h>
-#include <volk/volk_32f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_min_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h
deleted file mode 100644
index 90961ac92..000000000
--- a/volk/lib/qa_32f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H
-#define INCLUDED_QA_32F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
deleted file mode 100644
index 0c242b649..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <volk/volk_32f_multiply_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_multiply_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] * input1[i];
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_multiply_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h
deleted file mode 100644
index 7032a2ad4..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
deleted file mode 100644
index 1c7b485a6..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <volk/volk_32f_normalize_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_normalize_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_normalize_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 320001;
-  const int ITERS = 100;
-
-  float* output0;
-  float* output01;
-  ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
-  ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(float));
-  printf("32f_normalize_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    // printf("%e...%e\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-
-  free(output0);
-  free(output01);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h
deleted file mode 100644
index 7c421eb82..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_normalize_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc
deleted file mode 100644
index 1b331daeb..000000000
--- a/volk/lib/qa_32f_power_aligned16.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_power_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32f_power_aligned16::t1() {
-
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float* input;
-  int i;
-  
-  float* result_generic;
-  float* result_sse;
-  float* result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen *  sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float));
-
-  random_floats((float*)input, vlen);
-
-  const float power = 3;
-  
-  printf("32f_power_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32f_power_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h
deleted file mode 100644
index d45df4e56..000000000
--- a/volk/lib/qa_32f_power_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H
-#define INCLUDED_QA_32F_POWER_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_power_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_power_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
deleted file mode 100644
index 62d55767a..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <volk/volk_32f_sqrt_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_sqrt_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-    output_known[i] = sqrt(input0[i]);
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f\n", input0[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_32f_sqrt_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h
deleted file mode 100644
index e4b99d981..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H
-#define INCLUDED_QA_32F_SQRT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sqrt_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc
deleted file mode 100644
index 5934d70df..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <volk/volk_32f_stddev_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean = 0;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    mean += input0[i];
-  }
-  mean /= static_cast<float>(vlen);
-
-  printf("32f_stddev_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h
deleted file mode 100644
index 7f8d7a5fc..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
deleted file mode 100644
index 78c701d78..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-#include <volk/volk_32f_stddev_and_mean_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean_generic;
-  float mean_sse;
-  float mean_sse4_1;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_stddev_and_mean_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4);
-
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h
deleted file mode 100644
index e08bd249a..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
deleted file mode 100644
index ffe4b504c..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <volk/volk_32f_subtract_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_subtract_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_subtract_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_subtract_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h
deleted file mode 100644
index 97c14f129..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_subtract_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc
deleted file mode 100644
index 494776357..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <math.h>
-
-#define SNR 30.0
-#define CENTER -4.0
-#define CUTOFF -5.595
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 100000
-#define VEC_LEN 64
-static float uniform() {
-  return ((float) rand() / RAND_MAX);	// uniformly (0, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] =  uniform () * -SNR/2.0;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  int i = 0;
-  
-  volk_environment_init();
-  int ret;
-
-  const int vlen = VEC_LEN;
-  float cutoff = CUTOFF;
-  
-  float* center_point_array;
-  float* target;
-  float* target_generic;
-  float* src0 ;
-
-
-  ret = posix_memalign((void**)&center_point_array, 16, 24);
-  ret = posix_memalign((void**)&target, 16, 4);
-  ret = posix_memalign((void**)&target_generic, 16, 4);
-  ret = posix_memalign((void**)&src0, 16, (vlen << 2));
-  
- 
-  random_floats((float*)src0, vlen);
- 
-  float a = (float)CENTER;
-  float etoa = expf(a);
-  center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 +
-			   (-4.0 * a * a * a)/24.0 + 
-			   (3.0 * a * a)/6.0 +
-			   (-2.0 * a)/2.0 +
-			   (1.0)) * etoa;
-  center_point_array[1] = (//(-10.0 * a * a * a)/120.0 +
-			   (6.0 * a * a)/24.0 + 
-			   (-3.0 * a)/6.0 +
-			   (1.0/2.0)) * etoa;
-  center_point_array[2] = (//(10.0 * a * a)/120.0 +
-			   (-4.0 * a)/24.0 +
-			   (1.0/6.0)) * etoa;
-  center_point_array[3] = (//(-5.0 * a)/120.0 +
-			   (1.0/24.0)) * etoa;
-  //center_point_array[4] = ((1.0)/120.0) * etoa;
-  center_point_array[4] = (//(a * a * a * a * a)/120.0 +
-			   (a * a * a * a)/24.0 +
-			   (a * a * a)/-6.0 +
-			   (a * a)/2.0 +
-			   -a + 1.0) * etoa;
-  
-  printf("32f_sum_of_poly_aligned16\n");
-
-  clock_t start, end;
-  double total;
-  
-  float my_sum = 0.0;
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    float sum = 0.0;
-    for(int l = 0; l < vlen; ++l) {
-      
-      sum += expf(src0[l]);
-      
-    }
-    my_sum = sum;
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("exp time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    
-    volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic");
-  
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 approx time: %f\n", total);
-
-
-  
-  printf("exp: %f, sse3: %f\n", my_sum, target[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA);
-  
-
-  free(center_point_array);
-  free(target);
-  free(target_generic);
-  free(src0);
-
-  
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h
deleted file mode 100644
index 67a347f9a..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
deleted file mode 100644
index 4eba0a3cd..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
-void qa_32fc_32f_multiply_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  float * taps;
-  int i;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float));
-
-  random_floats((float*)input, vlen * 2);
-  random_floats(taps, vlen);
-  
-  printf("32fc_32f_multiply_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-#else
-void qa_32fc_32f_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h
deleted file mode 100644
index fc3b3eeb2..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
deleted file mode 100644
index 64ea65da9..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1.5e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  int i;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float));
-
-  random_floats((float*)input, vlen * 2);
-
-  const float power = 3.2;
-  
-  printf("32fc_32f_power_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
deleted file mode 100644
index 464b7b7cc..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
deleted file mode 100644
index c55ab5aa0..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <volk/volk_32fc_atan2_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_atan2_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h
deleted file mode 100644
index 9c4dc209a..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
deleted file mode 100644
index 2f9a30395..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#else
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
deleted file mode 100644
index 507b1769b..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 72e084c05..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 78660e6ad..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
deleted file mode 100644
index 89770c236..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_generic1[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-  double output_sse21[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
deleted file mode 100644
index f924b9752..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 7472476f7..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 68b80f27d..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 5cbdc49b3..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 765450bb6..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
deleted file mode 100644
index 4147e30ae..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
deleted file mode 100644
index 3e55fb812..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc
deleted file mode 100644
index bcf9ea954..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <stdio.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-
-
-#if LV_HAVE_SSE3
-void qa_32fc_dot_prod_aligned16::t1() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_32
-void qa_32fc_dot_prod_aligned16::t2() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_32_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t2() {
-  printf("sse_32 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-void qa_32fc_dot_prod_aligned16::t3() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_64_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t3() {
-  printf("sse_64 not available... no test performed\n");
-}
-
-
-
-#endif 
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h
deleted file mode 100644
index 4b360db27..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-  void t2 ();
-  void t3 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
deleted file mode 100644
index 16984e30d..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <volk/volk_32fc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h
deleted file mode 100644
index ffdf1dd9e..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
deleted file mode 100644
index b99f1ddcf..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <volk/volk_32fc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h
deleted file mode 100644
index a2881308c..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
deleted file mode 100644
index e1f7eab3d..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
-void qa_32fc_multiply_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  int i;
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
-  
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_multiply_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-#else
-void qa_32fc_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h
deleted file mode 100644
index c8abaa8fe..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
deleted file mode 100644
index 1444c78a9..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <volk/volk_32fc_power_spectrum_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse3
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  const float scalar = vlen;
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-
-  printf("32fc_power_spectrum_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
deleted file mode 100644
index d991223f3..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc
deleted file mode 100644
index d9ead8495..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  
-  printf("32fc_square_dist_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-  volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(; i < vlen; ++i) {
-    //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h
deleted file mode 100644
index 9d365d8b0..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
deleted file mode 100644
index f923d1d5c..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define ERR_DELTA .0001
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-  float scalar;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  random_floats(&scalar, 1);
-  
-  printf("32fc_square_dist_scalar_mult_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(i = 0; i < vlen; ++i) {
-    printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
deleted file mode 100644
index ac4e3c45b..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
deleted file mode 100644
index 661801709..000000000
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_and_aligned16.h>
-#include <volk/volk_32s_and_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_and_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_and_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_and_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h
deleted file mode 100644
index dfcb47c63..000000000
--- a/volk/lib/qa_32s_and_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H
-#define INCLUDED_QA_32S_AND_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_and_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_and_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc
deleted file mode 100644
index 07d799809..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <volk/volk_32s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h
deleted file mode 100644
index efd2a2eea..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc
deleted file mode 100644
index 2ec610ffb..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <volk/volk_32s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h
deleted file mode 100644
index 5006f5fd8..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
deleted file mode 100644
index 9da2ae344..000000000
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_or_aligned16.h>
-#include <volk/volk_32s_or_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_or_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_or_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_or_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h
deleted file mode 100644
index 9e949eb52..000000000
--- a/volk/lib/qa_32s_or_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H
-#define INCLUDED_QA_32S_OR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_or_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_or_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */
diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc
deleted file mode 100644
index 313c786b6..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <volk/volk_32u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint32_t output0[vlen] __attribute__ ((aligned (16)));
-  uint32_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint32_t));
-  printf("32u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h
deleted file mode 100644
index 47bad4c3d..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc
deleted file mode 100644
index 7f9c4584a..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <volk/volk_64f_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h
deleted file mode 100644
index 95d79f73d..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc
deleted file mode 100644
index 98aadbf4d..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <volk/volk_64f_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h
deleted file mode 100644
index 430327e81..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc
deleted file mode 100644
index 76e755514..000000000
--- a/volk/lib/qa_64f_max_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_max_aligned16.h>
-#include <volk/volk_64f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_max_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h
deleted file mode 100644
index 7cbd4d4c1..000000000
--- a/volk/lib/qa_64f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H
-#define INCLUDED_QA_64F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc
deleted file mode 100644
index 4b70d2881..000000000
--- a/volk/lib/qa_64f_min_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_min_aligned16.h>
-#include <volk/volk_64f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_min_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h
deleted file mode 100644
index a0e95395f..000000000
--- a/volk/lib/qa_64f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H
-#define INCLUDED_QA_64F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc
deleted file mode 100644
index 20d012c9e..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <volk/volk_64u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint64_t output0[vlen] __attribute__ ((aligned (16)));
-  uint64_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint64_t));
-  printf("64u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h
deleted file mode 100644
index a4fa0c983..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc
deleted file mode 100644
index 8dd5f76ca..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <volk/volk_8s_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h
deleted file mode 100644
index 38739fc96..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc
deleted file mode 100644
index 12c502d4b..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <volk/volk_8s_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h
deleted file mode 100644
index d39fffc35..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
deleted file mode 100644
index 672f5662f..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <volk/volk_8s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h
deleted file mode 100644
index 7f8401d42..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc
deleted file mode 100644
index 43468b1b1..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <volk/volk_8s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen+1] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen+1] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen+1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%e...%e\n", output_generic[i], output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h
deleted file mode 100644
index aad2f8c22..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index 94e63e37d..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_11[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse4_11[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 9c99fed70..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 29073eed7..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  float output_sse14_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-  }
-}
-
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 63b5fdadb..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 4980c982a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 02050926f..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 3c3f737a1..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> *input0;
-  
-  float* output_generic;
-  float* output_sse;
-  float* output_sse4_1;
-
-  ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t));
-  ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0);
-  }
-
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-
-  free(input0);
-  free(output_generic);
-  free(output_sse);
-  free(output_sse4_1);
-}
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 93338e488..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index a33d1bf30..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 92fc0dd4a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
deleted file mode 100644
index 216bf1cef..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<int16_t>* result_generic;
-  std::complex<int16_t>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t));
-  
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_16sc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
deleted file mode 100644
index 0e78a5eca..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
deleted file mode 100644
index 4c707446e..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float));
-  
-
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
deleted file mode 100644
index eb9ae309c..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
new file mode 100644
index 000000000..e85e2c1bc
--- /dev/null
+++ b/volk/lib/qa_utils.cc
@@ -0,0 +1,447 @@
+#include "qa_utils.h"
+#include <stdlib.h>
+#include <boost/foreach.hpp>
+#include <boost/assign/list_of.hpp>
+#include <boost/tokenizer.hpp>
+//#include <boost/test/unit_test.hpp>
+#include <iostream>
+#include <vector>
+#include <time.h>
+#include <math.h>
+#include <boost/lexical_cast.hpp>
+//#include <volk/volk_runtime.h>
+#include <volk/volk_registry.h>
+#include <volk/volk.h>
+#include <boost/typeof/typeof.hpp>
+#include <boost/type_traits.hpp>
+
+float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+template <class t>
+void random_floats (t *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+void load_random_data(void *data, volk_type_t type, unsigned int n) {
+    if(type.is_complex) n *= 2;
+    if(type.is_float) {
+        if(type.size == 8) random_floats<double>((double *)data, n);
+        else random_floats<float>((float *)data, n);
+    } else {
+        float int_max = float(uint64_t(2) << (type.size*8));
+        if(type.is_signed) int_max /= 2.0;
+        for(int i=0; i<n; i++) {
+            float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
+            //man i really don't know how to do this in a more clever way, you have to cast down at some point
+            switch(type.size) {
+            case 8:
+                if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
+                else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
+            break;
+            case 4:
+                if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
+                else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
+            break;           
+            case 2:
+                if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
+                else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
+            break;
+            case 1:
+                if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
+                else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
+            break;
+            default:
+                throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
+            }
+        }
+    }
+}
+
+void *make_aligned_buffer(unsigned int len, unsigned int size) {
+  void *buf;
+  int ret;
+  ret = posix_memalign((void**)&buf, 16, len * size);
+  assert(ret == 0);
+  memset(buf, 0x00, len*size);
+  return buf;
+}
+
+void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) {
+    BOOST_FOREACH(volk_type_t sig, inputsig) {
+        if(!sig.is_scalar) //we don't make buffers for scalars
+          buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1)));
+    }
+}
+
+static std::vector<std::string> get_arch_list(const int archs[]) {
+    std::vector<std::string> archlist;
+    int num_archs = archs[0];
+    
+    //there has got to be a way to query these arches
+    for(int i = 0; i < num_archs; i++) {
+        switch(archs[i+1]) {
+        case (1<<LV_GENERIC):
+            archlist.push_back("generic");
+            break;
+        case (1<<LV_ORC):
+            archlist.push_back("orc");
+            break;
+        case (1<<LV_SSE):
+            archlist.push_back("sse");
+            break;
+        case (1<<LV_SSE2):
+            archlist.push_back("sse2");
+            break;
+        case (1<<LV_SSE3):
+            archlist.push_back("sse3");
+            break;
+        case (1<<LV_SSSE3):
+            archlist.push_back("ssse3");
+            break;
+        case (1<<LV_SSE4_1):
+            archlist.push_back("sse4_1");
+            break;
+        case (1<<LV_SSE4_2):
+            archlist.push_back("sse4_2");
+            break;
+        case (1<<LV_SSE4_A):
+            archlist.push_back("sse4_a");
+            break;
+        case (1<<LV_MMX):
+            archlist.push_back("mmx");
+            break;
+        case (1<<LV_AVX):
+            archlist.push_back("avx");
+            break;
+        default:
+            break;
+        }
+    }
+    return archlist;
+}
+
+volk_type_t volk_type_from_string(std::string name) {
+    volk_type_t type;
+    type.is_float = false;
+    type.is_scalar = false;
+    type.is_complex = false;
+    type.is_signed = false;
+    type.size = 0;
+    type.str = name;
+    
+    if(name.size() < 2) throw std::string("name too short to be a datatype");
+    
+    //is it a scalar?
+    if(name[0] == 's') { 
+        type.is_scalar = true;
+        name = name.substr(1, name.size()-1);
+    }
+    
+    //get the data size
+    int last_size_pos = name.find_last_of("0123456789");
+    if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
+    //will throw if malformed
+    int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
+
+    assert(((size % 8) == 0) && (size <= 64) && (size != 0));
+    type.size = size/8; //in bytes
+    
+    for(int i=last_size_pos+1; i < name.size(); i++) {
+        switch (name[i]) {
+        case 'f':
+            type.is_float = true;
+            break;
+        case 'i':
+            type.is_signed = true;
+            break;
+        case 'c':
+            type.is_complex = true;
+            break;
+        case 'u':
+            type.is_signed = false;
+            break;
+        default:
+            throw;
+        }
+    }
+    
+    return type;
+}
+
+static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, 
+                                   std::vector<volk_type_t> &outputsig, 
+                                   std::string name) {
+    boost::char_separator<char> sep("_");
+    boost::tokenizer<boost::char_separator<char> > tok(name, sep);
+    std::vector<std::string> toked;
+    tok.assign(name);
+    toked.assign(tok.begin(), tok.end());
+    
+    assert(toked[0] == "volk");
+    toked.erase(toked.begin());
+
+    //ok. we're assuming a string in the form
+    //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
+
+    enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
+    std::string fn_name;
+    volk_type_t type;
+    BOOST_FOREACH(std::string token, toked) {
+        try {
+            type = volk_type_from_string(token);
+            if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
+            
+            if(side == SIDE_INPUT) inputsig.push_back(type);
+            else outputsig.push_back(type);
+        } catch (...){
+            if(token[0] == 'x') { //it's a multiplier
+                if(side == SIDE_INPUT) assert(inputsig.size() > 0);
+                else assert(outputsig.size() > 0);
+                int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
+                for(int i=1; i<multiplier; i++) {
+                    if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
+                    else outputsig.push_back(outputsig.back());
+                }
+            }
+            else if(side == SIDE_INPUT) { //it's the function name, at least it better be
+                side = SIDE_NAME;
+                fn_name.append("_");
+                fn_name.append(token);
+            } 
+            else if(side == SIDE_OUTPUT) {
+                if(token != toked.back()) throw; //the last token in the name is the alignment
+            }
+        }
+    }
+    //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
+    assert(inputsig.size() != 0);
+}
+
+inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], vlen, arch.c_str());
+}
+
+inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
+}
+
+inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
+}
+
+inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
+}
+
+inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
+}
+
+template <class t>
+bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    bool fail = false;
+    int print_max_errs = 10;
+    for(int i=0; i<vlen; i++) {
+        if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision
+        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
+            }
+        }
+    }
+    
+    return fail;
+}
+
+template <class t>
+bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
+    bool fail = false;
+    int print_max_errs = 10;
+    for(int i=0; i<vlen; i++) {
+        if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
+            }
+        }
+    }
+    
+    return fail;
+}
+
+bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
+    std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
+    
+    //first let's get a list of available architectures for the test
+    std::vector<std::string> arch_list = get_arch_list(archs);
+    
+    if(arch_list.size() < 2) {
+        std::cout << "no architectures to test" << std::endl;
+        return false;
+    }
+    
+    //now we have to get a function signature by parsing the name
+    std::vector<volk_type_t> inputsig, outputsig;
+    get_signatures_from_name(inputsig, outputsig, name);
+    
+    //pull the input scalars into their own vector
+    std::vector<volk_type_t> inputsc;
+    for(int i=0; i<inputsig.size(); i++) {
+        if(inputsig[i].is_scalar) {
+            inputsc.push_back(inputsig[i]);
+            inputsig.erase(inputsig.begin() + i);
+        }
+    }
+
+    //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
+    //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
+    std::vector<void *> inbuffs;
+    std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later.
+                                    //we need it because we dupe void*s in test_data below.
+    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    for(int i=0; i<inbuffs.size(); i++) {
+        load_random_data(inbuffs[i], inputsig[i], vlen);   
+        free_buffs.push_back(inbuffs[i]);
+    }
+    
+    //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
+    std::vector<std::vector<void *> > test_data;
+    for(int i=0; i<arch_list.size(); i++) {
+        std::vector<void *> arch_buffs;
+        for(int j=0; j<outputsig.size(); j++) {
+            arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
+            free_buffs.push_back(arch_buffs.back());
+        }
+        for(int j=0; j<inputsig.size(); j++) {
+            arch_buffs.push_back(inbuffs[j]);
+        }
+        test_data.push_back(arch_buffs);
+    }
+    
+    std::vector<volk_type_t> both_sigs;
+    both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
+    both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
+
+    //now run the test
+    clock_t start, end;
+    for(int i = 0; i < arch_list.size(); i++) {
+        start = clock();
+
+        switch(both_sigs.size()) {
+            case 1:
+                if(inputsc.size() == 0) {
+                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                } else throw "unsupported 1 arg function >1 scalars";
+                break;
+            case 2:
+                if(inputsc.size() == 0) {
+                    run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                } else throw "unsupported 2 arg function >1 scalars";
+                break;
+            case 3:
+                if(inputsc.size() == 0) {
+                    run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+                } else throw "unsupported 3 arg function >1 scalars";
+                break;
+            case 4:
+                run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
+                break;
+            default:
+                throw "no function handler for this signature";
+                break;
+        }
+        
+        end = clock();
+        std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+    }
+    //and now compare each output to the generic output
+    //first we have to know which output is the generic one, they aren't in order...
+    int generic_offset=0;
+    for(int i=0; i<arch_list.size(); i++) 
+        if(arch_list[i] == "generic") generic_offset=i;
+
+    //now compare
+    //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
+    
+    bool fail = false;
+    bool fail_global = false;
+    for(int i=0; i<arch_list.size(); i++) {
+        if(i != generic_offset) {
+            for(int j=0; j<both_sigs.size(); j++) {
+                if(both_sigs[j].is_float) {
+                    if(both_sigs[j].size == 8) {
+                        fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                    }
+                } else {
+                    //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
+                    switch(both_sigs[j].size) {
+                    case 8:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 4:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 2:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 1:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    default:
+                        fail=1;
+                    }
+                }
+                if(fail) {
+                    fail_global = true;
+                    std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
+                }
+                //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
+            }
+        }
+    }
+
+    BOOST_FOREACH(void *buf, free_buffs) {
+        free(buf);
+    }
+
+    return fail_global;
+}
+
+
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
new file mode 100644
index 000000000..e2539060a
--- /dev/null
+++ b/volk/lib/qa_utils.h
@@ -0,0 +1,33 @@
+#ifndef VOLK_QA_UTILS_H
+#define VOLK_QA_UTILS_H
+
+#include <stdlib.h>
+#include <string>
+
+struct volk_type_t {
+    bool is_float;
+    bool is_scalar;
+    bool is_signed;
+    bool is_complex;
+    int size;
+    std::string str;
+};
+
+volk_type_t volk_type_from_string(std::string);
+
+float uniform(void);
+void random_floats(float *buf, unsigned n);
+
+bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int);
+
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0)
+
+typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
+typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
+typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
+typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
+
+#endif //VOLK_QA_UTILS_H
diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h
deleted file mode 100644
index 43fa7faba..000000000
--- a/volk/lib/qa_volk.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Example Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Example Public License for more details.
- * 
- * You should have received a copy of the GNU Example Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_QA_VOLK_H
-#define INCLUDED_QA_VOLK_H
-
-#include <cppunit/TestSuite.h>
-
-//! collect all the tests for the example directory
-
-class qa_volk {
- public:
-  //! return suite of tests for all of example directory
-  static CppUnit::TestSuite *suite ();
-};
-
-#endif /* INCLUDED_QA_VOLK_H */
diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc
deleted file mode 100644
index 50ac08eab..000000000
--- a/volk/lib/test_all.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2002,2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <cppunit/ui/text/TestRunner.h>
-#include <cppunit/TextTestRunner.h>
-
-#include <qa_volk.h>
-
-#include <cppunit/XmlOutputter.h>
-#include <iostream>
-#include <getopt.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include <fstream>
-
-int 
-main (int argc, char **argv)
-{
-  
-  int opt = 0;
-  std::string xmlOutputFile("");
-
-  while( (opt = getopt(argc, argv, "o:")) != -1){
-    switch(opt){
-    case 'o':
-      if(optarg){
-	xmlOutputFile.assign(optarg);
-      }
-      else{
-	std::cerr << "No xml file output specified for -o" << std::endl;
-	exit(EXIT_FAILURE);
-      }
-      break;
-
-    default: /* '?' */
-      fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n",
-	      argv[0]);
-      exit(EXIT_FAILURE);
-    }
-
-  }
-
-  CppUnit::TextUi::TestRunner runner;
-
-  runner.addTest (qa_volk::suite ());
-
-  bool was_successful = false;
-  if(!xmlOutputFile.empty()){
-    std::ofstream xmlOutput(xmlOutputFile.c_str());
-    if(xmlOutput.is_open()){
-      runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput));
-
-      was_successful = runner.run("", false, true, false);
-    }
-    xmlOutput.close();
-  }
-  else{
-    was_successful = runner.run ("", false);
-  }
-
-  return was_successful ? 0 : 1;
-}
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
new file mode 100644
index 000000000..f33670856
--- /dev/null
+++ b/volk/lib/testqa.cc
@@ -0,0 +1,99 @@
+#include "qa_utils.h"
+#include <volk/volk.h>
+#include <volk/volk_registry.h>
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(volk_test_all) {    
+    //in order...
+//    VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
+    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
+
+}
diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am
new file mode 100644
index 000000000..6b5e4f8b6
--- /dev/null
+++ b/volk/orc/Makefile.am
@@ -0,0 +1,56 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS)
+
+include $(top_srcdir)/Makefile.common
+lib_LTLIBRARIES = libvolk_orc.la
+libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS)
+
+libvolk_orc_la_SOURCES = \
+volk_8i_convert_16i_a16_orc_impl.orc \
+volk_8i_s32f_convert_32f_a16_orc_impl.orc \
+volk_16u_byteswap_a16_orc_impl.orc \
+volk_32i_x2_and_32i_a16_orc_impl.orc \
+volk_32i_x2_or_32i_a16_orc_impl.orc \
+volk_32f_x2_add_32f_a16_orc_impl.orc \
+volk_32f_x2_subtract_32f_a16_orc_impl.orc \
+volk_32f_x2_divide_32f_a16_orc_impl.orc \
+volk_32f_x2_multiply_32f_a16_orc_impl.orc \
+volk_32fc_x2_multiply_32fc_a16_orc_impl.orc \
+volk_32fc_32f_multiply_32fc_a16_orc_impl.orc \
+volk_32f_sqrt_32f_a16_orc_impl.orc \
+volk_32f_x2_max_32f_a16_orc_impl.orc \
+volk_32f_x2_min_32f_a16_orc_impl.orc \
+volk_32f_s32f_normalize_a16_orc_impl.orc \
+volk_32fc_magnitude_32f_a16_orc_impl.orc \
+volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc \
+volk_16ic_magnitude_16i_a16_orc_impl.orc \
+volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc \
+volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc \
+volk_16ic_deinterleave_real_8i_a16_orc_impl.orc
+
+
+
+
+my_ORCC_FLAGS = --implementation $(ORCC_FLAGS)
+
+.orc.c:
+	$(ORCC) $(my_ORCC_FLAGS) -o $@ $<
diff --git a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc
new file mode 100644
index 000000000..0189fbf5d
--- /dev/null
+++ b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc
@@ -0,0 +1,12 @@
+.function volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl
+.dest 4 idst
+.dest 4 qdst
+.source 4 src
+.floatparam 4 scalar
+.temp 8 iql
+.temp 8 iqf
+
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+splitql qdst, idst, iqf
diff --git a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc
new file mode 100644
index 000000000..56018edda
--- /dev/null
+++ b/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_16ic_deinterleave_16i_x2_a16_orc_impl
+.dest 2 idst
+.dest 2 qdst
+.source 4 src
+splitlw qdst, idst, src
diff --git a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc
new file mode 100644
index 000000000..dba9a4c8e
--- /dev/null
+++ b/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc
@@ -0,0 +1,6 @@
+.function volk_16ic_deinterleave_real_8i_a16_orc_impl
+.dest 1 dst
+.source 4 src
+.temp 2 iw
+select0lw iw, src
+convhwb dst, iw
diff --git a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc
new file mode 100644
index 000000000..37225e9b8
--- /dev/null
+++ b/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc
@@ -0,0 +1,23 @@
+.function volk_16ic_magnitude_16i_a16_orc_impl
+.source 4 src
+.dest 2 dst
+.floatparam 4 scalar
+.temp 8 iql
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+.temp 4 rootf
+.temp 4 rootl
+
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+x2 mulf prodiqf, iqf, iqf
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf rootf, sumf
+mulf rootf, rootf, scalar
+convfl rootl, rootf
+convlw dst, rootl
diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..1e2380837
--- /dev/null
+++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
@@ -0,0 +1,25 @@
+.function volk_16ic_magnitude_32f_a16_orc_impl
+.source 4 src
+.dest 4 dst
+.floatparam 4 scalar
+.temp 4 reall
+.temp 4 imagl
+.temp 2 reals
+.temp 2 imags
+.temp 4 realf
+.temp 4 imagf
+.temp 4 sumf
+
+
+
+splitlw reals, imags, src
+convswl reall, reals
+convswl imagl, imags
+convlf realf, reall
+convlf imagf, imagl
+divf realf, realf, scalar
+divf imagf, imagf, scalar
+mulf realf, realf, realf
+mulf imagf, imagf, imagf
+addf sumf, realf, imagf
+sqrtf dst, sumf
diff --git a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc
new file mode 100644
index 000000000..c1c8ee59e
--- /dev/null
+++ b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc
@@ -0,0 +1,3 @@
+.function volk_16u_byteswap_a16_orc_impl
+.dest 2 dst
+swapw dst, dst
diff --git a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc b/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc
new file mode 100644
index 000000000..acd319b16
--- /dev/null
+++ b/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_s32f_normalize_a16_orc_impl
+.source 4 src1
+.floatparam 4 invscalar
+.dest 4 dst
+mulf dst, src1, invscalar
diff --git a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc b/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..ae5680f15
--- /dev/null
+++ b/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc
@@ -0,0 +1,4 @@
+.function volk_32f_sqrt_32f_a16_orc_impl
+.source 4 src
+.dest 4 dst
+sqrtf dst, src
diff --git a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..8d095a052
--- /dev/null
+++ b/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_add_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+addf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..0097646cb
--- /dev/null
+++ b/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_divide_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+divf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..b7f008737
--- /dev/null
+++ b/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_max_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+maxf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..78328b576
--- /dev/null
+++ b/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_min_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+minf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..e8fadff19
--- /dev/null
+++ b/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_multiply_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+mulf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..13fbe8c83
--- /dev/null
+++ b/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_x2_subtract_32f_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+subf dst, src1, src2
diff --git a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc
new file mode 100644
index 000000000..455293cff
--- /dev/null
+++ b/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc
@@ -0,0 +1,7 @@
+.function volk_32fc_32f_multiply_32fc_a16_orc_impl
+.source 8 src1
+.source 4 src2
+.dest 8 dst
+.temp 8 tmp
+mergelq tmp, src2, src2
+x2 mulf dst, src1, tmp
diff --git a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..c5e2e57f1
--- /dev/null
+++ b/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc
@@ -0,0 +1,13 @@
+.function volk_32fc_magnitude_32f_a16_orc_impl
+.source 8 src
+.dest 4 dst
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf dst, sumf
diff --git a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc
new file mode 100644
index 000000000..6116f5e1f
--- /dev/null
+++ b/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc
@@ -0,0 +1,23 @@
+.function volk_32fc_s32f_magnitude_16i_a16_orc_impl
+.source 8 src
+.dest 2 dst
+.floatparam 4 scalar
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+.temp 4 rootf
+.temp 4 rootl
+.temp 4 maskl
+
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf rootf, sumf
+mulf rootf, rootf, scalar
+cmpltf maskl, 32768.0, rootf
+andl maskl, maskl, 0x80000000
+orl rootf, rootf, maskl
+convfl rootl, rootf
+convssslw dst, rootl
diff --git a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc
new file mode 100644
index 000000000..a27d722cd
--- /dev/null
+++ b/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc
@@ -0,0 +1,18 @@
+.function volk_32fc_x2_multiply_32fc_a16_orc_impl
+.source 8 src1
+.source 8 src2
+.dest 8 dst
+.temp 8 iqprod
+.temp 4 real
+.temp 4 imag
+.temp 4 ac
+.temp 4 bd
+.temp 8 swapped
+x2 mulf iqprod, src1, src2
+splitql bd, ac, iqprod
+subf real, ac, bd
+swaplq swapped, src1
+x2 mulf iqprod, swapped, src2
+splitql bd, ac, iqprod
+addf imag, ac, bd
+mergelq dst, real, imag
diff --git a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc
new file mode 100644
index 000000000..7b331f8ed
--- /dev/null
+++ b/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32i_x2_and_32i_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+andl dst, src1, src2
diff --git a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc
new file mode 100644
index 000000000..4984a9ced
--- /dev/null
+++ b/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32i_x2_or_32i_a16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+orl dst, src1, src2
diff --git a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc b/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc
new file mode 100644
index 000000000..f44845c88
--- /dev/null
+++ b/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_8i_convert_16i_a16_orc_impl
+.source 1 src
+.dest 2 dst
+convsbw dst, src
+shlw dst, dst, 8
diff --git a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc b/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc
new file mode 100644
index 000000000..8f6e157e9
--- /dev/null
+++ b/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc
@@ -0,0 +1,11 @@
+.function volk_8i_s32f_convert_32f_a16_orc_impl
+.source 1 src
+.dest 4 dst
+.floatparam 4 scalar
+.temp 4 flsrc
+.temp 4 lsrc
+.temp 2 ssrc
+convsbw ssrc, src
+convswl lsrc, ssrc
+convlf flsrc, lsrc
+mulf dst, flsrc, scalar
diff --git a/volk/volk.pc.in b/volk/volk.pc.in
index a24298856..b03dbdada 100644
--- a/volk/volk.pc.in
+++ b/volk/volk.pc.in
@@ -10,6 +10,6 @@ Name: volk
 Description: VOLK.. Vector Optimized Library of Kernels
 Requires:
 Version: @VERSION@
-Libs: -lvolk -lvolk_runtime
+Libs: -lvolk -lvolk_runtime -lvolk_orc
 Cflags: -I${includedir} ${LV_CXXFLAGS}