diff options
Diffstat (limited to 'volk')
285 files changed, 2241 insertions, 11752 deletions
diff --git a/volk/Makefile.am b/volk/Makefile.am index 4c6951ca7..03c5aac35 100644 --- a/volk/Makefile.am +++ b/volk/Makefile.am @@ -24,7 +24,12 @@ ACLOCAL_AMFLAGS = -I config include $(top_srcdir)/Makefile.common EXTRA_DIST = bootstrap configure config.h.in volk_config.h -SUBDIRS = config include lib +SUBDIRS = config +if LV_HAVE_ORC +SUBDIRS += orc +endif +SUBDIRS += include lib + #if USE_PYTHON #SUBDIRS += python #endif @@ -53,4 +58,6 @@ distclean-local: -rm -f include/Makefile.in -rm -f lib/Makefile.in -rm -f python/Makefile.in - -rm -f configure
\ No newline at end of file + -rm -f configure + -rm -f orc/Makefile.in + -rm -f orc/*.c diff --git a/volk/config.guess b/volk/config.guess index 285237846..405bc3235 100755..120000 --- a/volk/config.guess +++ b/volk/config.guess @@ -1,1505 +1 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 -# Free Software Foundation, Inc. - -timestamp='2010-08-21' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Originally written by Per Bothner. Please send patches (context -# diff format) to <config-patches@gnu.org> and include a ChangeLog -# entry. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to <config-patches@gnu.org>." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free -Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -trap 'exit 1' HUP INT TERM - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" HUP INT PIPE TERM ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ; set_cc_for_build= ;' - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - sh5el) machine=sh5le-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ELF__ - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in - Debian*) - release='-gnu' - ;; - *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" - exit ;; - *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} - exit ;; - *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit ;; - *:SolidBSD:*:*) - echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} - exit ;; - macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd${UNAME_RELEASE} - exit ;; - *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit ;; - alpha:OSF1:*:*) - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - ;; - *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE="alpha" ;; - "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; - "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; - "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; - "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; - "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; - "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; - "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; - "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; - "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; - "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; - "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos - exit ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit ;; - *:z/VM:*:*) - echo s390-ibm-zvmoe - exit ;; - *:OS400:*:*) - echo powerpc-ibm-os400 - exit ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit ;; - arm:riscos:*:*|arm:RISCOS:*:*) - echo arm-unknown-riscos - exit ;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit ;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit ;; - DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7; exit ;; - esac ;; - s390x:SunOS:*:*) - echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - echo i386-pc-auroraux${UNAME_RELEASE} - exit ;; - i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - eval $set_cc_for_build - SUN_ARCH="i386" - # If there is a compiler, see if it is configured for 64-bit objects. - # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. - # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - SUN_ARCH="x86_64" - fi - fi - echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit ;; - m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} - exit ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus -#include <stdio.h> /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && - dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`$dummy $dummyarg` && - { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos${UNAME_RELEASE} - exit ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit ;; - ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <sys/systemcfg.h> - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` - then - echo "$SYSTEM_NAME" - else - echo rs6000-ibm-aix3.2.5 - fi - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit ;; - *:AIX:*:[4567]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac - fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - - #define _HPUX_SOURCE - #include <stdlib.h> - #include <unistd.h> - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if [ ${HP_ARCH} = "hppa2.0w" ] - then - eval $set_cc_for_build - - # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating - # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler - # generating 64-bit code. GNU and HP use different nomenclature: - # - # $ CC_FOR_BUILD=cc ./config.guess - # => hppa2.0w-hp-hpux11.23 - # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess - # => hppa64-hp-hpux11.23 - - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | - grep -q __LP64__ - then - HP_ARCH="hppa2.0w" - else - HP_ARCH="hppa64" - fi - fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} - exit ;; - 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <unistd.h> - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - echo unknown-hitachi-hiuxwe2 - exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit ;; - i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:FreeBSD:*:*) - case ${UNAME_MACHINE} in - pc98) - echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - esac - exit ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit ;; - *:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit ;; - i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 - exit ;; - i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 - exit ;; - *:Interix*:*) - case ${UNAME_MACHINE} in - x86) - echo i586-pc-interix${UNAME_RELEASE} - exit ;; - authenticamd | genuineintel | EM64T) - echo x86_64-unknown-interix${UNAME_RELEASE} - exit ;; - IA64) - echo ia64-unknown-interix${UNAME_RELEASE} - exit ;; - esac ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit ;; - 8664:Windows_NT:*) - echo x86_64-pc-mks - exit ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit ;; - amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - echo x86_64-unknown-cygwin - exit ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - *:GNU:*:*) - # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit ;; - arm*:Linux:*:*) - eval $set_cc_for_build - if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_EABI__ - then - echo ${UNAME_MACHINE}-unknown-linux-gnu - else - echo ${UNAME_MACHINE}-unknown-linux-gnueabi - fi - exit ;; - avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit ;; - crisv32:Linux:*:*) - echo crisv32-axis-linux-gnu - exit ;; - frv:Linux:*:*) - echo frv-unknown-linux-gnu - exit ;; - i*86:Linux:*:*) - LIBC=gnu - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" - exit ;; - ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - mips:Linux:*:* | mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef ${UNAME_MACHINE} - #undef ${UNAME_MACHINE}el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=${UNAME_MACHINE}el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=${UNAME_MACHINE} - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - or32:Linux:*:*) - echo or32-unknown-linux-gnu - exit ;; - padre:Linux:*:*) - echo sparc-unknown-linux-gnu - exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; - esac - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux - exit ;; - sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - tile*:Linux:*:*) - echo ${UNAME_MACHINE}-tilera-linux-gnu - exit ;; - vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-gnu - exit ;; - x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit ;; - xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx - exit ;; - i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop - exit ;; - i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos - exit ;; - i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable - exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit ;; - i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit ;; - i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name` - echo ${UNAME_MACHINE}-pc-isc$UNAME_REL - elif /bin/uname -X 2>/dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. - # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that - # this is a cross-build. - echo i586-pc-msdosdjgpp - exit ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit ;; - M68*:*:R3V[5678]*:*) - test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; - NCR*:*:4.2:* | MPRAS*:*:4.2:*) - OS_REL='.3' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says <Richard.M.Bartel@ccMail.Census.GOV> - echo i586-unisys-sysv4 - exit ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes <hewes@openmarket.com>. - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit ;; - i*86:VOS:*:*) - # From Paul.Green@stratus.com. - echo ${UNAME_MACHINE}-stratus-vos - exit ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit ;; - BePC:Haiku:*:*) # Haiku running on Intel PC compatible. - echo i586-pc-haiku - exit ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} - exit ;; - SX-7:SUPER-UX:*:*) - echo sx7-nec-superux${UNAME_RELEASE} - exit ;; - SX-8:SUPER-UX:*:*) - echo sx8-nec-superux${UNAME_RELEASE} - exit ;; - SX-8R:SUPER-UX:*:*) - echo sx8r-nec-superux${UNAME_RELEASE} - exit ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Darwin:*:*) - UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - case $UNAME_PROCESSOR in - i386) - eval $set_cc_for_build - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - UNAME_PROCESSOR="x86_64" - fi - fi ;; - unknown) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit ;; - NSE-?:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk${UNAME_RELEASE} - exit ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} - exit ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit ;; - DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "$cputype" = "386"; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo ${UNAME_MACHINE}-unknown-plan9 - exit ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit ;; - *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit ;; - *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` - case "${UNAME_MACHINE}" in - A*) echo alpha-dec-vms ; exit ;; - I*) echo ia64-dec-vms ; exit ;; - V*) echo vax-dec-vms ; exit ;; - esac ;; - *:XENIX:*:SysV) - echo i386-pc-xenix - exit ;; - i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' - exit ;; - i*86:rdos:*:*) - echo ${UNAME_MACHINE}-pc-rdos - exit ;; - i*86:AROS:*:*) - echo ${UNAME_MACHINE}-pc-aros - exit ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -eval $set_cc_for_build -cat >$dummy.c <<EOF -#ifdef _SEQUENT_ -# include <sys/types.h> -# include <sys/utsname.h> -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include <sys/param.h> - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include <sys/param.h> -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd - exit ;; - esac -fi - -cat >&2 <<EOF -$0: unable to guess system type - -This script, last modified $timestamp, has failed to recognize -the operating system you are using. It is advised that you -download the most up to date version of the config scripts from - - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD -and - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD - -If the version you run ($0) is already up to date, please -send the following data and any information you think might be -pertinent to <config-patches@gnu.org> in order to provide the needed -information to handle your system. - -config.guess timestamp = $timestamp - -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} -EOF - -exit 1 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: +/usr/share/automake-1.11/config.guess
\ No newline at end of file diff --git a/volk/config.sub b/volk/config.sub index 320e30388..4d47fbcbc 100755..120000 --- a/volk/config.sub +++ b/volk/config.sub @@ -1,1739 +1 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 -# Free Software Foundation, Inc. - -timestamp='2010-09-11' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Please send patches to <config-patches@gnu.org>. Submit a context -# diff and a properly formatted GNU ChangeLog entry. -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS - -Canonicalize a configuration name. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to <config-patches@gnu.org>." - -version="\ -GNU config.sub ($timestamp) - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free -Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo $1 - exit ;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ - linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ - storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray | -microblaze) - os= - basic_machine=$1 - ;; - -bluegene*) - os=-cnk - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -chorusos*) - os=-chorusos - basic_machine=$1 - ;; - -chorusrdb) - os=-chorusrdb - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco6) - os=-sco5v6 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5v6*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ - | bfin \ - | c4x | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | fido | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | lm32 \ - | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | mcore | mep | metag \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64octeon | mips64octeonel \ - | mips64orion | mips64orionel \ - | mips64r5900 | mips64r5900el \ - | mips64vr | mips64vrel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | moxie \ - | mt \ - | msp430 \ - | nds32 | nds32le | nds32be \ - | nios | nios2 \ - | ns16k | ns32k \ - | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ - | pyramid \ - | rx \ - | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ - | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ - | spu | strongarm \ - | tahoe | thumb | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ - | ubicom32 \ - | v850 | v850e \ - | we32k \ - | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ - | z8k | z80) - basic_machine=$basic_machine-unknown - ;; - c54x) - basic_machine=tic54x-unknown - ;; - c55x) - basic_machine=tic55x-unknown - ;; - c6x) - basic_machine=tic6x-unknown - ;; - m6811 | m68hc11 | m6812 | m68hc12 | picochip) - # Motorola 68HC11/12. - basic_machine=$basic_machine-unknown - os=-none - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) - ;; - ms1) - basic_machine=mt-unknown - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* | avr32-* \ - | bfin-* | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | clipper-* | craynv-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | lm32-* \ - | m32c-* | m32r-* | m32rle-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64octeon-* | mips64octeonel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64r5900-* | mips64r5900el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mips64vr5900-* | mips64vr5900el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipstx39-* | mipstx39el-* \ - | mmix-* \ - | mt-* \ - | msp430-* \ - | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* \ - | none-* | np1-* | ns16k-* | ns32k-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ - | pyramid-* \ - | romp-* | rs6000-* | rx-* \ - | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ - | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tile-* | tilegx-* \ - | tron-* \ - | ubicom32-* \ - | v850-* | v850e-* | vax-* \ - | we32k-* \ - | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ - | xstormy16-* | xtensa*-* \ - | ymp-* \ - | z8k-* | z80-*) - ;; - # Recognize the basic CPU types without company name, with glob match. - xtensa*) - basic_machine=$basic_machine-unknown - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - abacus) - basic_machine=abacus-unknown - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aros) - basic_machine=i386-pc - os=-aros - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - blackfin) - basic_machine=bfin-unknown - os=-linux - ;; - blackfin-*) - basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - bluegene*) - basic_machine=powerpc-ibm - os=-cnk - ;; - c54x-*) - basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c55x-*) - basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c6x-*) - basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - cegcc) - basic_machine=arm-unknown - os=-cegcc - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - craynv) - basic_machine=craynv-cray - os=-unicosmp - ;; - cr16) - basic_machine=cr16-unknown - os=-elf - ;; - crds | unos) - basic_machine=m68k-crds - ;; - crisv32 | crisv32-* | etraxfs*) - basic_machine=crisv32-axis - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - crx) - basic_machine=crx-unknown - os=-elf - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 - ;; - decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dicos) - basic_machine=i686-pc - os=-dicos - ;; - djgpp) - basic_machine=i586-pc - os=-msdosdjgpp - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m68knommu) - basic_machine=m68k-unknown - os=-linux - ;; - m68knommu-*) - basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - microblaze) - basic_machine=microblaze-xilinx - ;; - mingw32) - basic_machine=i386-pc - os=-mingw32 - ;; - mingw32ce) - basic_machine=arm-unknown - os=-mingw32ce - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - ms1-*) - basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; - np1) - basic_machine=np1-gould - ;; - neo-tandem) - basic_machine=neo-tandem - ;; - nse-tandem) - basic_machine=nse-tandem - ;; - nsr-tandem) - basic_machine=nsr-tandem - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - openrisc | openrisc-*) - basic_machine=or32-unknown - ;; - os400) - basic_machine=powerpc-ibm - os=-os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - parisc) - basic_machine=hppa-unknown - os=-linux - ;; - parisc-*) - basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pc98) - basic_machine=i386-pc - ;; - pc98-*) - basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rdos) - basic_machine=i386-pc - os=-rdos - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sb1) - basic_machine=mipsisa64sb1-unknown - ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown - ;; - sde) - basic_machine=mipsisa32-sde - os=-elf - ;; - sei) - basic_machine=mips-sei - os=-seiux - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sh5el) - basic_machine=sh5le-unknown - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - sv1) - basic_machine=sv1-cray - os=-unicos - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=alphaev5-cray - os=-unicos - ;; - t90) - basic_machine=t90-cray - os=-unicos - ;; - # This must be matched before tile*. - tilegx*) - basic_machine=tilegx-unknown - os=-linux-gnu - ;; - tile*) - basic_machine=tile-unknown - os=-linux-gnu - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - toad1) - basic_machine=pdp10-xkl - os=-tops20 - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - tpf) - basic_machine=s390x-ibm - os=-tpf - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xbox) - basic_machine=i686-pc - os=-mingw32 - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - ymp) - basic_machine=ymp-cray - os=-unicos - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - z80-*-coff) - basic_machine=z80-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - romp) - basic_machine=romp-ibm - ;; - mmix) - basic_machine=mmix-knuth - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) - basic_machine=sh-unknown - ;; - sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - *-unknown) - # Make sure to match an already-canonicalized machine name. - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -auroraux) - os=-auroraux - ;; - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ - | -sym* | -kopensolaris* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -openbsd* | -solidbsd* \ - | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ - | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* | -cegcc* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-android* \ - | -linux-newlib* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ - | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ - | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ - | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -qnx*) - case $basic_machine in - x86-* | i*86-*) - ;; - *) - os=-nto$os - ;; - esac - ;; - -nto-qnx*) - ;; - -nto*) - os=`echo $os | sed -e 's|nto|nto-qnx|'` - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ - | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux-dietlibc) - os=-linux-dietlibc - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -os400*) - os=-os400 - ;; - -wince*) - os=-wince - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -atheos*) - os=-atheos - ;; - -syllable*) - os=-syllable - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -nova*) - os=-rtmk-nova - ;; - -ns2 ) - os=-nextstep2 - ;; - -nsk*) - os=-nsk - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -tpf*) - os=-tpf - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - os=-mint - ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; - -zvmoe) - os=-zvmoe - ;; - -dicos*) - os=-dicos - ;; - -nacl*) - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - score-*) - os=-elf - ;; - spu-*) - os=-elf - ;; - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - c4x-* | tic4x-*) - os=-coff - ;; - tic54x-*) - os=-coff - ;; - tic55x-*) - os=-coff - ;; - tic6x-*) - os=-coff - ;; - # This must come before the *-dec entry. - pdp10-*) - os=-tops20 - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - m68*-cisco) - os=-aout - ;; - mep-*) - os=-elf - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - or32-*) - os=-coff - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-haiku) - os=-haiku - ;; - *-ibm) - os=-aix - ;; - *-knuth) - os=-mmixware - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f30[01]-fujitsu | f700-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -cnk*|-aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -os400*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -tpf*) - vendor=ibm - ;; - -vxsim* | -vxworks* | -windiss*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - vendor=atari - ;; - -vos*) - vendor=stratus - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os -exit - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: +/usr/share/automake-1.11/config.sub
\ No newline at end of file diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am index 0e556c6e2..27e3f1296 100644 --- a/volk/config/Makefile.am +++ b/volk/config/Makefile.am @@ -27,6 +27,8 @@ m4datadir = $(datadir)/aclocal # List your m4 macros here m4macros = \ acx_pthread.m4 \ + ax_boost_base.m4 \ + ax_boost_unit_test_framework.m4 \ bnv_have_qt.m4 \ cppunit.m4 \ gr_lib64.m4 \ @@ -45,6 +47,7 @@ m4macros = \ mkstemp.m4 \ onceonly.m4 \ pkg.m4 \ + orc.m4 \ gcc_version_workaround.m4 diff --git a/volk/config/ax_boost_base.m4 b/volk/config/ax_boost_base.m4 new file mode 100644 index 000000000..e9790227e --- /dev/null +++ b/volk/config/ax_boost_base.m4 @@ -0,0 +1,334 @@ +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_boost_base.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_BASE([MINIMUM-VERSION]) +# +# DESCRIPTION +# +# Test for the Boost C++ libraries of a particular version (or newer) +# +# If no path to the installed boost library is given the macro searchs +# under /usr, /usr/local, /opt and /opt/local and evaluates the +# $BOOST_ROOT environment variable. Further documentation is available at +# <http://randspringer.de/boost/index.html>. +# +# This macro calls: +# +# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) +# +# And sets: +# +# HAVE_BOOST +# +# LAST MODIFICATION +# +# 2008-04-12 +# +# COPYLEFT +# +# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> +# Copyright (c) 2008 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. + +AC_DEFUN([AX_BOOST_BASE], +[ +AC_REQUIRE([GR_LIB64]) +AC_ARG_WITH([boost], + AS_HELP_STRING([--with-boost@<:@=DIR@:>@], + [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ac_boost_path="" + else + want_boost="yes" + ac_boost_path="$withval" + fi + ], + [want_boost="yes"]) + + +AC_ARG_WITH([boost-libdir], + AS_HELP_STRING([--with-boost-libdir=LIB_DIR], + [Force given directory for boost libraries. Note that this + will overwrite library path detection, so use this parameter + only if default library detection fails and you know exactly + where your boost libraries are located.]), + [ + if test -d $withval + then + ac_boost_lib_path="$withval" + else + AC_MSG_ERROR(--with-boost-libdir expected directory name) + fi + ], + [ac_boost_lib_path=""] +) + +if test "x$want_boost" = "xyes"; then + boost_lib_version_req=ifelse([$1], ,1.20.0,$1) + boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` + boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` + boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` + boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` + if test "x$boost_lib_version_req_sub_minor" = "x" ; then + boost_lib_version_req_sub_minor="0" + fi + WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` + AC_MSG_CHECKING(for boost >= $boost_lib_version_req) + succeeded=no + + dnl first we check the system location for boost libraries + dnl this location ist chosen if boost libraries are installed with the --layout=system option + dnl or if you install boost with RPM + if test "$ac_boost_path" != ""; then + dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib. + dnl If that directory doesn't exist, fall back to the default behavior + if test -d "$ac_boost_path/lib${gr_libdir_suffix}"; then + BOOST_LDFLAGS="-L$ac_boost_path/lib${gr_libdir_suffix}" + else + BOOST_LDFLAGS="-L$ac_boost_path/lib" + fi + BOOST_CPPFLAGS="-I$ac_boost_path/include" + else + for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then + dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib. + dnl If that directory doesn't exist, fall back to the default behavior + if test -d "$ac_boost_path_tmp/lib${gr_libdir_suffix}"; then + BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib${gr_libdir_suffix}" + else + BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib" + fi + BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" + break; + fi + done + fi + + dnl overwrite ld flags if we have required special directory with + dnl --with-boost-libdir parameter + if test "$ac_boost_lib_path" != ""; then + BOOST_LDFLAGS="-L$ac_boost_lib_path" + fi + + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include <boost/version.hpp> + ]], [[ + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + ]])],[AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ], + []) + AC_LANG_POP([C++]) + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + + + dnl if we found no boost with system layout we search for boost libraries + dnl built and installed without the --layout=system option + if test "$succeeded" != "yes"; then + _version=0 + + if test "$ac_boost_path" != ""; then + path_list="$ac_boost_path" + else + path_list="/usr /usr/local /opt /opt/local" + fi + for ac_boost_path in $path_list ; do + if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then + for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's,/include/boost-,,; s,_,.,'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "$V_CHECK" = "1" ; then + _version=$_version_tmp + best_path=$ac_boost_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" + + if test "$ac_boost_lib_path" = ""; then + dnl Look first where we think they ought to be, accounting for a possible "64" suffix on lib. + dnl If that directory doesn't exist, fall back to the default behavior + if test -d "$best_path/lib${gr_libdir_suffix}"; then + BOOST_LDFLAGS="-L$best_path/lib${gr_libdir_suffix}" + else + BOOST_LDFLAGS="-L$best_path/lib" + fi + fi + + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + @%:@include <boost/version.hpp> + ]], [[ + #if BOOST_VERSION >= $WANT_BOOST_VERSION + // Everything is okay + #else + # error Boost version is too old + #endif + ]])],[AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ], + []) + AC_LANG_POP([C++]) + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + fi + + if test "$succeeded" != "yes" ; then + AC_MSG_RESULT([no]) + if test "$_version" = "0" ; then + AC_MSG_ERROR([[we could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). +If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>.]]) + else + AC_MSG_ERROR([your boost libraries seem to old (version $_version).]) + fi + else + AC_SUBST(BOOST_CPPFLAGS) + AC_SUBST(BOOST_LDFLAGS) + AC_DEFINE(HAVE_BOOST,1,[Define if the Boost headers are available]) + fi +fi +]) + +dnl +dnl Macros used by the boost items that need libraries. +dnl + +dnl $1 is unit name. E.g., boost_thread +AC_DEFUN([_AX_BOOST_CHECK_LIB],[ + _AX_BOOST_CHECK_LIB_($1,HAVE_[]m4_toupper($1),m4_toupper($1)_LIB) +]) + +dnl $1 is unit name. E.g., boost_thread +dnl $2 is AC_DEFINE name. E.g., HAVE_BOOST_THREAD +dnl $3 is lib var name. E.g., BOOST_THREAD_LIB +AC_DEFUN([_AX_BOOST_CHECK_LIB_],[ + AC_LANG_PUSH([C++]) + AC_DEFINE($2,1,[Define if the $1 library is available]) + BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` + + dnl See if we can find a usable library + link_ok="no" + if test "$ax_boost_user_lib" != ""; then + dnl use what the user supplied + for ax_lib in $ax_boost_user_lib $1-${ax_boost_user_lib}; do + AC_CHECK_LIB($ax_lib, exit, + [$3="-l$ax_lib"; AC_SUBST($3) link_ok="yes"; break]) + done + else + dnl Look in BOOSTLIBDIR for possible candidates + head=$BOOSTLIBDIR/lib[]$1 + for f in ${head}*.so* ${head}*.a* ${head}*.dll* ${head}*.dylib; do + dnl echo 1: $f + case $f in + *\**) continue;; + esac + f=`echo $f | sed -e 's,.*/,,' -e 's,^lib,,'` + dnl echo 2: $f + f=`echo $f | sed -e 's,\($1.*\)\.so.*$,\1,' -e 's,\($1.*\)\.a.*$,\1,' -e 's,\($1.*\)\.dll.*$,\1,' -e 's,\($1.*\)\.dylib.*$,\1,'` + dnl echo 3: $f + + ax_lib=$f + AC_CHECK_LIB($ax_lib, exit, + [$3="-l$ax_lib"; AC_SUBST($3) link_ok="yes"; break]) + done + fi + + if test "$link_ok" != "yes"; then + AC_MSG_ERROR([Could not link against lib[$1]!]) + fi + AC_LANG_POP([C++]) +]) + + +dnl $1 is unit name. E.g., boost_thread +AC_DEFUN([_AX_BOOST_WITH],[ + _AX_BOOST_WITH_($1,m4_bpatsubst($1,_,-)) +]) + +dnl $1 is unit name. E.g., boost_thread +dnl $2 is hyphenated unit name. E.g., boost-thread +AC_DEFUN([_AX_BOOST_WITH_],[ + AC_ARG_WITH([$2], + AC_HELP_STRING([--with-$2@<:@=special-lib@:>@], + [Use the m4_substr($1,6) library from boost. It is possible to specify a certain + library to the linker. E.g., --with-$2=$1-gcc41-mt-1_35]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ax_boost_user_lib="" + else + want_boost="yes" + ax_boost_user_lib="$withval" + fi + ], + [want_boost="yes"]) +]) + +dnl $1 is unit name. E.g., boost_thread +dnl $2 is AC_LANG_PROGRAM argument 1 +dnl $3 is AC_LANG_PROGRAM argument 2 +dnl $4 is cv variable name. E.g., ax_cv_boost_thread +AC_DEFUN([_AX_BOOST_CHECK_],[ + _AX_BOOST_WITH($1) + if test "$want_boost" = "yes"; then + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([AC_PROG_CXX]) + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + AC_CACHE_CHECK([whether the boost::m4_substr([$1],6) includes are available], [$4], + [AC_LANG_PUSH([C++]) + AC_COMPILE_IFELSE(AC_LANG_PROGRAM([$2],[$3]),[$4]=yes,[$4]=no) + AC_LANG_POP([C++]) + ]) + if test "$[$4]" = "yes"; then + _AX_BOOST_CHECK_LIB([$1]) + fi + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + fi +]) + +dnl $1 is unit name. E.g., boost_thread +dnl $2 is AC_LANG_PROGRAM argument 1 +dnl $3 is AC_LANG_PROGRAM argument 2 +AC_DEFUN([_AX_BOOST_CHECK],[ + _AX_BOOST_CHECK_($1,$2,$3,ax_cv_$1) +]) diff --git a/volk/config/ax_boost_unit_test_framework.m4 b/volk/config/ax_boost_unit_test_framework.m4 new file mode 100644 index 000000000..73affccfd --- /dev/null +++ b/volk/config/ax_boost_unit_test_framework.m4 @@ -0,0 +1,36 @@ +# +# SYNOPSIS +# +# AX_BOOST_UNIT_TEST_FRAMEWORK +# +# DESCRIPTION +# +# Test for Unit_Test_Framework library from the Boost C++ libraries. The +# macro requires a preceding call to AX_BOOST_BASE. +# +# This macro calls: +# +# AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) +# +# And sets: +# +# HAVE_BOOST_UNIT_TEST_FRAMEWORK +# +# COPYLEFT +# +# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> +# Copyright (c) 2008 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. + +AC_DEFUN([AX_BOOST_UNIT_TEST_FRAMEWORK], +[ + AC_REQUIRE([AX_BOOST_BASE]) + _AX_BOOST_CHECK([boost_unit_test_framework], + [@%:@include <boost/test/unit_test.hpp>], + [using boost::unit_test::test_suite; + test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" ); + return 0;]) +]) diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4 index c7a5fe960..f98b2dc5b 100644 --- a/volk/config/lv_configure.m4 +++ b/volk/config/lv_configure.m4 @@ -100,6 +100,9 @@ dnl AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes]) GR_PWIN32 GR_LIBGNURADIO_CORE_EXTRA_LDFLAGS + dnl Check for liborc + ORC_CHECK + LDFLAGS="$LDFLAGS $LIBGNURADIO_CORE_EXTRA_LDFLAGS" AC_CHECK_PROG([XMLTO],[xmlto],[yes],[]) diff --git a/volk/config/orc.m4 b/volk/config/orc.m4 new file mode 100644 index 000000000..df0f3d6f3 --- /dev/null +++ b/volk/config/orc.m4 @@ -0,0 +1,59 @@ +dnl pkg-config-based checks for Orc + +dnl specific: +dnl ORC_CHECK([REQUIRED_VERSION]) + +AC_DEFUN([ORC_CHECK], +[ + ORC_REQ=ifelse([$1], , "0.4.10", [$1]) + + if test "x$enable_orc" != "xno" ; then + PKG_CHECK_MODULES(ORC, orc-0.4 >= $ORC_REQ, [ + AC_DEFINE(LV_HAVE_ORC, 1, [Use Orc]) + if test "x$ORCC" = "x" ; then + ORCC=`$PKG_CONFIG --variable=orcc orc-0.4` + fi + AC_SUBST(ORCC) + ORCC_FLAGS="--compat $ORC_REQ" + ORC_LDFLAGS=`$PKG_CONFIG --libs orc-0.4` + ORC_CFLAGS=`$PKG_CONFIG --cflags orc-0.4` + AC_SUBST(ORCC_FLAGS) + AC_SUBST(ORC_LDFLAGS) + AC_SUBST(ORC_CFLAGS) + LV_HAVE_ORC=yes + LV_HAVE_ORCC=yes + if test "x$cross_compiling" = "xyes" ; then + LV_HAVE_ORCC=no + fi + ], [ + if test "x$enable_orc" = "xyes" ; then + AC_MSG_ERROR([--enable-orc specified, but Orc >= $ORC_REQ not found]) + fi + AC_DEFINE(DISABLE_ORC, 1, [Disable Orc]) + LV_HAVE_ORC=no + LV_HAVE_ORCC=no + ]) + else + AC_DEFINE(DISABLE_ORC, 1, [Disable Orc]) + LV_HAVE_ORC=no + LV_HAVE_ORCC=no + fi + AM_CONDITIONAL(LV_HAVE_ORC, [test "x$LV_HAVE_ORC" = "xyes"]) + AM_CONDITIONAL(LV_HAVE_ORCC, [test "x$LV_HAVE_ORCC" = "xyes"]) +])) + +AC_DEFUN([ORC_OUTPUT], +[ + if test "$LV_HAVE_ORC" = yes ; then + printf "configure: *** Orc acceleration enabled.\n" + else + if test "x$enable_orc" = "xno" ; then + printf "configure: *** Orc acceleration disabled by --disable-orc.\n" + else + printf "configure: *** Orc acceleration disabled. Requires Orc >= $ORC_REQ, which was\n" + printf " not found.\n" + fi + fi + printf "\n" +]) + diff --git a/volk/configure.ac b/volk/configure.ac index 7cbcbad53..c493adad6 100644 --- a/volk/configure.ac +++ b/volk/configure.ac @@ -18,7 +18,6 @@ dnl AC_INIT AC_PREREQ(2.57) AC_CONFIG_AUX_DIR([.]) -AC_CONFIG_SRCDIR([lib/test_all.cc]) AM_CONFIG_HEADER(config.h) AM_INIT_AUTOMAKE(volk,0.1) @@ -44,7 +43,7 @@ dnl If you need additional boost libraries, you'll need to dnl uncomment AX_BOOST_BASE, plus some of the following: dnl dnl calls AC_SUBST(BOOST_CPPFLAGS), AC_SUBST(BOOST_LDFLAGS) and defines HAVE_BOOST -dnl AX_BOOST_BASE([1.35]) +AX_BOOST_BASE([1.35]) dnl dnl All the rest of these call AC_SUBST(BOOST_<foo>_LIB) and define HAVE_BOOST_<foo> dnl @@ -57,7 +56,7 @@ dnl AX_BOOST_SERIALIZATION dnl AX_BOOST_SIGNALS dnl AX_BOOST_SYSTEM dnl AX_BOOST_TEST_EXEC_MONITOR -dnl AX_BOOST_UNIT_TEST_FRAMEWORK +AX_BOOST_UNIT_TEST_FRAMEWORK dnl AX_BOOST_WSERIALIZATION AC_CONFIG_HEADERS([volk_config.h]) @@ -69,6 +68,7 @@ AC_CONFIG_FILES([\ include/Makefile \ include/volk/Makefile \ lib/Makefile \ + orc/Makefile \ volk.pc \ ]) diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 00289be1e..eb97775b0 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -41,94 +41,93 @@ volkinclude_HEADERS = \ volk.h \ volk_cpu.h \ volk_environment_init.h \ - volk_16s_add_quad_aligned16.h \ - volk_16s_branch_4_state_8_aligned16.h \ - volk_16sc_deinterleave_16s_aligned16.h \ - volk_16sc_deinterleave_32f_aligned16.h \ - volk_16sc_deinterleave_real_16s_aligned16.h \ - volk_16sc_deinterleave_real_32f_aligned16.h \ - volk_16sc_deinterleave_real_8s_aligned16.h \ - volk_16sc_magnitude_16s_aligned16.h \ - volk_16sc_magnitude_32f_aligned16.h \ - volk_16s_convert_32f_aligned16.h \ - volk_16s_convert_32f_unaligned16.h \ - volk_16s_convert_8s_aligned16.h \ - volk_16s_convert_8s_unaligned16.h \ - volk_16s_max_star_aligned16.h \ - volk_16s_max_star_horizontal_aligned16.h \ - volk_16s_permute_and_scalar_add_aligned16.h \ - volk_16s_quad_max_star_aligned16.h \ - volk_16u_byteswap_aligned16.h \ - volk_32f_accumulator_aligned16.h \ - volk_32f_add_aligned16.h \ - volk_32fc_32f_multiply_aligned16.h \ - volk_32fc_32f_power_32fc_aligned16.h \ - volk_32f_calc_spectral_noise_floor_aligned16.h \ - volk_32fc_atan2_32f_aligned16.h \ - volk_32fc_conjugate_dot_prod_aligned16.h \ - volk_32fc_conjugate_dot_prod_unaligned.h \ - volk_32fc_deinterleave_32f_aligned16.h \ - volk_32fc_deinterleave_64f_aligned16.h \ - volk_32fc_deinterleave_real_16s_aligned16.h \ - volk_32fc_deinterleave_real_32f_aligned16.h \ - volk_32fc_deinterleave_real_64f_aligned16.h \ - volk_32fc_dot_prod_aligned16.h \ - volk_32fc_index_max_aligned16.h \ - volk_32fc_magnitude_16s_aligned16.h \ - volk_32fc_magnitude_32f_aligned16.h \ - volk_32fc_multiply_aligned16.h \ - volk_32f_convert_16s_aligned16.h \ - volk_32f_convert_16s_unaligned16.h \ - volk_32f_convert_32s_aligned16.h \ - volk_32f_convert_32s_unaligned16.h \ - volk_32f_convert_64f_aligned16.h \ - volk_32f_convert_64f_unaligned16.h \ - volk_32f_convert_8s_aligned16.h \ - volk_32f_convert_8s_unaligned16.h \ - volk_32fc_power_spectral_density_32f_aligned16.h \ - volk_32fc_power_spectrum_32f_aligned16.h \ - volk_32fc_square_dist_aligned16.h \ - volk_32fc_square_dist_scalar_mult_aligned16.h \ - volk_32f_divide_aligned16.h \ - volk_32f_dot_prod_aligned16.h \ - volk_32f_dot_prod_unaligned16.h \ - volk_32f_fm_detect_aligned16.h \ - volk_32f_index_max_aligned16.h \ - volk_32f_interleave_16sc_aligned16.h \ - volk_32f_interleave_32fc_aligned16.h \ - volk_32f_max_aligned16.h \ - volk_32f_min_aligned16.h \ - volk_32f_multiply_aligned16.h \ - volk_32f_normalize_aligned16.h \ - volk_32f_power_aligned16.h \ - volk_32f_sqrt_aligned16.h \ - volk_32f_stddev_aligned16.h \ - volk_32f_stddev_and_mean_aligned16.h \ - volk_32f_subtract_aligned16.h \ - volk_32f_sum_of_poly_aligned16.h \ - volk_32s_and_aligned16.h \ - volk_32s_convert_32f_aligned16.h \ - volk_32s_convert_32f_unaligned16.h \ - volk_32s_or_aligned16.h \ - volk_32u_byteswap_aligned16.h \ - volk_32u_popcnt_aligned16.h \ - volk_64f_convert_32f_aligned16.h \ - volk_64f_convert_32f_unaligned16.h \ - volk_64f_max_aligned16.h \ - volk_64f_min_aligned16.h \ - volk_64u_byteswap_aligned16.h \ - volk_64u_popcnt_aligned16.h \ - volk_8sc_deinterleave_16s_aligned16.h \ - volk_8sc_deinterleave_32f_aligned16.h \ - volk_8sc_deinterleave_real_16s_aligned16.h \ - volk_8sc_deinterleave_real_32f_aligned16.h \ - volk_8sc_deinterleave_real_8s_aligned16.h \ - volk_8sc_multiply_conjugate_16sc_aligned16.h \ - volk_8sc_multiply_conjugate_32fc_aligned16.h \ - volk_8s_convert_16s_aligned16.h \ - volk_8s_convert_16s_unaligned16.h \ - volk_8s_convert_32f_aligned16.h \ - volk_8s_convert_32f_unaligned16.h + volk_16i_x5_add_quad_16i_x4_a16.h \ + volk_16i_branch_4_state_8_a16.h \ + volk_16ic_deinterleave_16i_x2_a16.h \ + volk_16ic_s32f_deinterleave_32f_x2_a16.h \ + volk_16ic_deinterleave_real_16i_a16.h \ + volk_16ic_s32f_deinterleave_real_32f_a16.h \ + volk_16ic_deinterleave_real_8i_a16.h \ + volk_16ic_magnitude_16i_a16.h \ + volk_16ic_s32f_magnitude_32f_a16.h \ + volk_16i_s32f_convert_32f_a16.h \ + volk_16i_s32f_convert_32f_u.h \ + volk_16i_convert_8i_a16.h \ + volk_16i_convert_8i_u.h \ + volk_16i_max_star_16i_a16.h \ + volk_16i_max_star_horizontal_16i_a16.h \ + volk_16i_permute_and_scalar_add_a16.h \ + volk_16i_x4_quad_max_star_16i_a16.h \ + volk_16u_byteswap_a16.h \ + volk_32f_accumulator_s32f_a16.h \ + volk_32f_x2_add_32f_a16.h \ + volk_32fc_32f_multiply_32fc_a16.h \ + volk_32fc_s32f_power_32fc_a16.h \ + volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h \ + volk_32fc_s32f_atan2_32f_a16.h \ + volk_32fc_x2_conjugate_dot_prod_32fc_a16.h \ + volk_32fc_deinterleave_32f_x2_a16.h \ + volk_32fc_deinterleave_64f_x2_a16.h \ + volk_32fc_s32f_deinterleave_real_16i_a16.h \ + volk_32fc_deinterleave_real_32f_a16.h \ + volk_32fc_deinterleave_real_64f_a16.h \ + volk_32fc_x2_dot_prod_32fc_a16.h \ + volk_32fc_index_max_16u_a16.h \ + volk_32fc_s32f_magnitude_16i_a16.h \ + volk_32fc_magnitude_32f_a16.h \ + volk_32fc_x2_multiply_32fc_a16.h \ + volk_32f_s32f_convert_16i_a16.h \ + volk_32f_s32f_convert_16i_u.h \ + volk_32f_s32f_convert_32i_a16.h \ + volk_32f_s32f_convert_32i_u.h \ + volk_32f_convert_64f_a16.h \ + volk_32f_convert_64f_u.h \ + volk_32f_s32f_convert_8i_a16.h \ + volk_32f_s32f_convert_8i_u.h \ + volk_32fc_s32f_x2_power_spectral_density_32f_a16.h \ + volk_32fc_s32f_power_spectrum_32f_a16.h \ + volk_32fc_x2_square_dist_32f_a16.h \ + volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h \ + volk_32f_x2_divide_32f_a16.h \ + volk_32f_x2_dot_prod_32f_a16.h \ + volk_32f_x2_dot_prod_32f_u.h \ + volk_32f_s32f_32f_fm_detect_32f_a16.h \ + volk_32f_index_max_16u_a16.h \ + volk_32f_x2_s32f_interleave_16ic_a16.h \ + volk_32f_x2_interleave_32fc_a16.h \ + volk_32f_x2_max_32f_a16.h \ + volk_32f_x2_min_32f_a16.h \ + volk_32f_x2_multiply_32f_a16.h \ + volk_32f_s32f_normalize_a16.h \ + volk_32f_s32f_power_32f_a16.h \ + volk_32f_sqrt_32f_a16.h \ + volk_32f_s32f_stddev_32f_a16.h \ + volk_32f_stddev_and_mean_32f_x2_a16.h \ + volk_32f_x2_subtract_32f_a16.h \ + volk_32f_x3_sum_of_poly_32f_a16.h \ + volk_32i_x2_and_32i_a16.h \ + volk_32i_s32f_convert_32f_a16.h \ + volk_32i_s32f_convert_32f_u.h \ + volk_32i_x2_or_32i_a16.h \ + volk_32u_byteswap_a16.h \ + volk_32u_popcnt_a16.h \ + volk_64f_convert_32f_a16.h \ + volk_64f_convert_32f_u.h \ + volk_64f_x2_max_64f_a16.h \ + volk_64f_x2_min_64f_a16.h \ + volk_64u_byteswap_a16.h \ + volk_64u_popcnt_a16.h \ + volk_8ic_deinterleave_16i_x2_a16.h \ + volk_8ic_s32f_deinterleave_32f_x2_a16.h \ + volk_8ic_deinterleave_real_16i_a16.h \ + volk_8ic_s32f_deinterleave_real_32f_a16.h \ + volk_8ic_deinterleave_real_8i_a16.h \ + volk_8ic_x2_multiply_conjugate_16ic_a16.h \ + volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h \ + volk_8i_convert_16i_a16.h \ + volk_8i_convert_16i_u.h \ + volk_8i_s32f_convert_32f_a16.h \ + volk_8i_s32f_convert_32f_u.h VOLK_MKTABLES_SOURCES = \ $(platform_CODE) \ diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml index b7c98500f..a19a5add9 100644 --- a/volk/include/volk/archs.xml +++ b/volk/include/volk/archs.xml @@ -5,6 +5,12 @@ <flag>none</flag> </arch> +<arch name="orc" type="all"> + <flag>lorc-0.4</flag> + <overrule>LV_HAVE_ORC</overrule> + <overrule_val>no</overrule_val> +</arch> + <arch name="altivec" type="powerpc"> <flag>maltivec</flag> </arch> diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py index f2432d7a4..6e75067d0 100644 --- a/volk/include/volk/make_c.py +++ b/volk/include/volk/make_c.py @@ -25,7 +25,6 @@ def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : tempstring = tempstring + " return 0;\n" tempstring = tempstring + "}\n" - for i in range(len(funclist)): tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py index 275d3869f..c74b0464d 100644 --- a/volk/include/volk/make_set_simd.py +++ b/volk/include/volk/make_set_simd.py @@ -95,7 +95,7 @@ def make_set_simd(dom) : arch = str(domarch.attributes["name"].value); tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"generic\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; @@ -165,8 +165,22 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" tempstring = tempstring + " (powerpc)\n" @@ -210,14 +224,49 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; + tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" + tempstring = tempstring + " (*)\n" + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + if atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" tempstring = tempstring + " ;;\n" tempstring = tempstring + " esac\n" tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" tempstring = tempstring + "])\n" return tempstring; - - + diff --git a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h b/volk/include/volk/volk_16i_branch_4_state_8_a16.h index fb9d7cb87..3437c1a6b 100644 --- a/volk/include/volk/volk_16s_branch_4_state_8_aligned16.h +++ b/volk/include/volk/volk_16i_branch_4_state_8_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H -#define INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H +#ifndef INCLUDED_volk_16i_branch_4_state_8_a16_H +#define INCLUDED_volk_16i_branch_4_state_8_a16_H #include<inttypes.h> @@ -14,7 +14,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_branch_4_state_8_aligned16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11; @@ -138,7 +138,7 @@ static inline void volk_16s_branch_4_state_8_aligned16_ssse3(short* target, sh #endif /*LV_HAVE_SSEs*/ #if LV_HAVE_GENERIC -static inline void volk_16s_branch_4_state_8_aligned16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { +static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { int i = 0; int bound = 4; @@ -191,4 +191,4 @@ static inline void volk_16s_branch_4_state_8_aligned16_generic(short* target, #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_BRANCH_4_STATE_8_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_branch_4_state_8_a16_H*/ diff --git a/volk/include/volk/volk_16s_convert_8s_aligned16.h b/volk/include/volk/volk_16i_convert_8i_a16.h index 64c368688..73e45ad63 100644 --- a/volk/include/volk/volk_16s_convert_8s_aligned16.h +++ b/volk/include/volk/volk_16i_convert_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H +#ifndef INCLUDED_volk_16i_convert_8i_a16_H +#define INCLUDED_volk_16i_convert_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -52,7 +52,7 @@ static inline void volk_16s_convert_8s_aligned16_sse2(int8_t* outputVector, cons \param outputVector The 8 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_a16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -66,4 +66,4 @@ static inline void volk_16s_convert_8s_aligned16_generic(int8_t* outputVector, c -#endif /* INCLUDED_VOLK_16s_CONVERT_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_a16_H */ diff --git a/volk/include/volk/volk_16s_convert_8s_unaligned16.h b/volk/include/volk/volk_16i_convert_8i_u.h index ca925de86..5fc792b56 100644 --- a/volk/include/volk/volk_16s_convert_8s_unaligned16.h +++ b/volk/include/volk/volk_16i_convert_8i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H +#ifndef INCLUDED_volk_16i_convert_8i_u_H +#define INCLUDED_volk_16i_convert_8i_u_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -54,7 +54,7 @@ static inline void volk_16s_convert_8s_unaligned16_sse2(int8_t* outputVector, co \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ +static inline void volk_16i_convert_8i_u_generic(int8_t* outputVector, const int16_t* inputVector, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -68,4 +68,4 @@ static inline void volk_16s_convert_8s_unaligned16_generic(int8_t* outputVector, -#endif /* INCLUDED_VOLK_16s_CONVERT_8s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_16i_convert_8i_u_H */ diff --git a/volk/include/volk/volk_16s_max_star_aligned16.h b/volk/include/volk/volk_16i_max_star_16i_a16.h index ba4e979ec..ff57bd2a1 100644 --- a/volk/include/volk/volk_16s_max_star_aligned16.h +++ b/volk/include/volk/volk_16i_max_star_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H -#define INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H +#ifndef INCLUDED_volk_16i_max_star_16i_a16_H +#define INCLUDED_volk_16i_max_star_16i_a16_H #include<inttypes.h> @@ -12,7 +12,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_max_star_aligned16_ssse3(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, unsigned int num_bytes) { @@ -87,7 +87,7 @@ static inline void volk_16s_max_star_aligned16_ssse3(short* target, short* src0 #if LV_HAVE_GENERIC -static inline void volk_16s_max_star_aligned16_generic(short* target, short* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) { int i = 0; @@ -105,4 +105,4 @@ static inline void volk_16s_max_star_aligned16_generic(short* target, short* src #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_MAX_STAR_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_max_star_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h index 82d011677..695e08dbf 100644 --- a/volk/include/volk/volk_16s_max_star_horizontal_aligned16.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H +#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a16_H +#define INCLUDED_volk_16i_max_star_horizontal_16i_a16_H #include<inttypes.h> @@ -12,7 +12,7 @@ #include<emmintrin.h> #include<tmmintrin.h> -static inline void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, int16_t* src0, unsigned int num_bytes) { const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d}; @@ -110,7 +110,7 @@ static inline void volk_16s_max_star_horizontal_aligned16_ssse3(int16_t* target #if LV_HAVE_GENERIC -static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { +static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { int i = 0; @@ -127,4 +127,4 @@ static inline void volk_16s_max_star_horizontal_aligned16_generic(int16_t* targe #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_MAX_STAR_HORIZONTAL_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h index 452d05c4f..e52a949fb 100644 --- a/volk/include/volk/volk_16s_permute_and_scalar_add_aligned16.h +++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H -#define INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H +#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a16_H +#define INCLUDED_volk_16i_permute_and_scalar_add_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -117,7 +117,7 @@ static inline void volk_16s_permute_and_scalar_add_aligned16_sse2(short* target #if LV_HAVE_GENERIC -static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { +static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { int i = 0; @@ -136,4 +136,4 @@ static inline void volk_16s_permute_and_scalar_add_aligned16_generic(short* targ #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a16_H*/ diff --git a/volk/include/volk/volk_16s_convert_32f_aligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h index 126ce1528..83fd26ff9 100644 --- a/volk/include/volk/volk_16s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_a16_H +#define INCLUDED_volk_16i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -68,7 +68,7 @@ static inline void volk_16s_convert_32f_aligned16_sse4_1(float* outputVector, co \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -102,7 +102,7 @@ static inline void volk_16s_convert_32f_aligned16_sse(float* outputVector, const \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_a16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -116,4 +116,4 @@ static inline void volk_16s_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_16s_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_16s_convert_32f_unaligned16.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h index d6212fba5..8f0dd0083 100644 --- a/volk/include/volk/volk_16s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H +#define INCLUDED_volk_16i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -70,7 +70,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse4_1(float* outputVector, \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -105,7 +105,7 @@ static inline void volk_16s_convert_32f_unaligned16_sse(float* outputVector, con \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_16i_s32f_convert_32f_u_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int16_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -119,4 +119,4 @@ static inline void volk_16s_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_16s_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_16s_quad_max_star_aligned16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h index 1004c4d23..e4ec5ab4e 100644 --- a/volk/include/volk/volk_16s_quad_max_star_aligned16.h +++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H -#define INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H +#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H +#define INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<emmintrin.h> -static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { @@ -96,9 +96,9 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* /*asm volatile ( - "volk_16s_quad_max_star_aligned16_sse2_L1:\n\t" + "volk_16i_x4_quad_max_star_16i_a16_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je volk_16s_quad_max_star_aligned16_sse2_END\n\t" + "je volk_16i_x4_quad_max_star_16i_a16_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" @@ -143,9 +143,9 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* "movaps %%xmm1, (%[target])\n\t" "addw $16, %[target]\n\t" - "jmp volk_16s_quad_max_star_aligned16_sse2_L1\n\t" + "jmp volk_16i_x4_quad_max_star_16i_a16_sse2_L1\n\t" - "volk_16s_quad_max_star_aligned16_sse2_END:\n\t" + "volk_16i_x4_quad_max_star_16i_a16_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [target]"r"(target) : @@ -168,7 +168,7 @@ static inline void volk_16s_quad_max_star_aligned16_sse2(short* target, short* #if LV_HAVE_GENERIC -static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { +static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { int i = 0; @@ -188,4 +188,4 @@ static inline void volk_16s_quad_max_star_aligned16_generic(short* target, short #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_16s_QUAD_MAX_STAR_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a16_H*/ diff --git a/volk/include/volk/volk_16s_add_quad_aligned16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h index 63042bef1..5744ca3a6 100644 --- a/volk/include/volk/volk_16s_add_quad_aligned16.h +++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H -#define INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H +#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H +#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H #include<inttypes.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<emmintrin.h> -static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4; __m128i *p_target0, *p_target1, *p_target2, *p_target3, *p_src0, *p_src1, *p_src2, *p_src3, *p_src4; @@ -65,9 +65,9 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ } /*asm volatile ( - ".%=volk_16s_add_quad_aligned16_sse2_L1:\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1:\n\t" "cmp $0, %[bound]\n\t" - "je .%=volk_16s_add_quad_aligned16_sse2_END\n\t" + "je .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END\n\t" "movaps (%[src0]), %%xmm1\n\t" "movaps (%[src1]), %%xmm2\n\t" "movaps (%[src2]), %%xmm3\n\t" @@ -91,8 +91,8 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ "add $16, %[target1]\n\t" "add $16, %[target2]\n\t" "add $16, %[target3]\n\t" - "jmp .%=volk_16s_add_quad_aligned16_sse2_L1\n\t" - ".%=volk_16s_add_quad_aligned16_sse2_END:\n\t" + "jmp .%=volk_16i_x5_add_quad_16i_x4_a16_sse2_L1\n\t" + ".%=volk_16i_x5_add_quad_16i_x4_a16_sse2_END:\n\t" : :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3) :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5" @@ -113,7 +113,7 @@ static inline void volk_16s_add_quad_aligned16_sse2(short* target0, short* targ #if LV_HAVE_GENERIC -static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { +static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { int i = 0; @@ -133,4 +133,4 @@ static inline void volk_16s_add_quad_aligned16_generic(short* target0, short* ta -#endif /*INCLUDED_VOLK_16s_ADD_QUAD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a16_H*/ diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h index 32e13df98..7e08bf182 100644 --- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H +#define INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_ssse3(int16_t* iBuffer, \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -128,7 +128,7 @@ static inline void volk_16sc_deinterleave_16s_aligned16_sse2(int16_t* iBuffer, i \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16ic_deinterleave_16i_x2_a16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_16i_x2_a16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_16i_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h index b594c85b8..388c00592 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a16_H +#define INCLUDED_volk_16ic_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -55,7 +55,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_ssse3(int16_t* iBuf \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -103,7 +103,7 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_sse2(int16_t* iBuff \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (int16_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -117,4 +117,4 @@ static inline void volk_16sc_deinterleave_real_16s_aligned16_generic(int16_t* iB -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h index c0d1e941a..55a25702e 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a16_H +#define INCLUDED_volk_16ic_deinterleave_real_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -53,7 +53,7 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe number = sixteenthPoints * 16; int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr; for(; number < num_points; number++){ - *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ / 256)); + *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8)); int16ComplexVectorPtr++; } } @@ -66,18 +66,29 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; - const int16_t* complexVectorPtr = (int16_t*)complexVector; + int16_t* complexVectorPtr = (int16_t*)complexVector; int8_t* iBufferPtr = iBuffer; for(number = 0; number < num_points; number++){ - *iBufferPtr++ = (int8_t)(*complexVectorPtr++ / 256); + *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8)); complexVectorPtr++; } } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16ic_deinterleave_real_8i_a16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16ic_deinterleave_real_8i_a16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_deinterleave_real_8i_a16_orc_impl(iBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h index 1482ab82e..bdcace750 100644 --- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H -#define INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H +#define INCLUDED_volk_16ic_magnitude_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -84,7 +84,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -160,11 +160,11 @@ static inline void volk_16sc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ +static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; - const float scalar = 32786.0; + const float scalar = 32768.0; for(number = 0; number < num_points; number++){ float real = ((float)(*complexVectorPtr++)) / scalar; float imag = ((float)(*complexVectorPtr++)) / scalar; @@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16ic_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); +static inline void volk_16ic_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16ic_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_magnitude_16i_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h index 86f67437d..606de2fc5 100644 --- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -77,7 +77,7 @@ static inline void volk_16sc_deinterleave_32f_aligned16_sse(float* iBuffer, floa \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex data values to be deinterleaved + */ +extern void volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h index 3e7be1e64..62331e496 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H -#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -61,7 +61,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuff \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -107,7 +107,7 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* iBufferPtr = iBuffer; @@ -122,4 +122,4 @@ static inline void volk_16sc_deinterleave_real_32f_aligned16_generic(float* iBuf -#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h index 9c2a48835..ae64efbeb 100644 --- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H -#define INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H +#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -79,7 +79,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse3(float* magnitudeVector \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -148,7 +148,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_sse(float* magnitudeVector, \param scalar The data value to be divided against each input data value of the input complex vector \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ const int16_t* complexVectorPtr = (const int16_t*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16ic_s32f_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16ic_s32f_magnitude_32f_a16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16ic_s32f_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H */ diff --git a/volk/include/volk/volk_16u_byteswap_aligned16.h b/volk/include/volk/volk_16u_byteswap_a16.h index 698e958e4..c8128dbab 100644 --- a/volk/include/volk/volk_16u_byteswap_aligned16.h +++ b/volk/include/volk/volk_16u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_16u_byteswap_a16_H +#define INCLUDED_volk_16u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint16_t* inputPtr = intsToSwap; __m128i input, left, right, output; @@ -49,7 +49,7 @@ static inline void volk_16u_byteswap_aligned16_sse2(uint16_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, unsigned int num_points){ +static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned int num_points){ unsigned int point; uint16_t* inputPtr = intsToSwap; for(point = 0; point < num_points; point++){ @@ -61,5 +61,17 @@ static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, uns } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Byteswaps (in-place) an aligned vector of int16_t's. + \param intsToSwap The vector of data to byte swap + \param numDataPoints The number of data points +*/ +extern void volk_16u_byteswap_a16_orc_impl(uint16_t* intsToSwap, unsigned int num_points); +static inline void volk_16u_byteswap_a16_orc(uint16_t* intsToSwap, unsigned int num_points){ + volk_16u_byteswap_a16_orc_impl(intsToSwap, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_16u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_32f_accumulator_aligned16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h index 7e395cf50..4a3588e6d 100644 --- a/volk/include/volk/volk_32f_accumulator_aligned16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H +#ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H +#define INCLUDED_volk_32f_accumulator_s32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_aligned16_sse(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -49,7 +49,7 @@ static inline void volk_32f_accumulator_aligned16_sse(float* result, const float \param inputBuffer The buffer of data to be accumulated \param num_points The number of values in inputBuffer to be accumulated */ -static inline void volk_32f_accumulator_aligned16_generic(float* result, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_accumulator_s32f_a16_generic(float* result, const float* inputBuffer, unsigned int num_points){ const float* aPtr = inputBuffer; unsigned int number = 0; float returnValue = 0; @@ -64,4 +64,4 @@ static inline void volk_32f_accumulator_aligned16_generic(float* result, const f -#endif /* INCLUDED_VOLK_32f_ACCUMULATOR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_accumulator_s32f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_aligned16.h b/volk/include/volk/volk_32f_convert_64f_a16.h index 91a855813..c303dc118 100644 --- a/volk/include/volk/volk_32f_convert_64f_aligned16.h +++ b/volk/include/volk/volk_32f_convert_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H +#ifndef INCLUDED_volk_32f_convert_64f_a16_H +#define INCLUDED_volk_32f_convert_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_aligned16_sse2(double* outputVector, con \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_a16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_aligned16_generic(double* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_64f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_64f_unaligned16.h b/volk/include/volk/volk_32f_convert_64f_u.h index 698e0d446..a825767de 100644 --- a/volk/include/volk/volk_32f_convert_64f_unaligned16.h +++ b/volk/include/volk/volk_32f_convert_64f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_convert_64f_u_H +#define INCLUDED_volk_32f_convert_64f_u_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_64f_unaligned16_sse2(double* outputVector, c \param fVector The float vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector, const float* inputVector, unsigned int num_points){ +static inline void volk_32f_convert_64f_u_generic(double* outputVector, const float* inputVector, unsigned int num_points){ double* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -67,4 +67,4 @@ static inline void volk_32f_convert_64f_unaligned16_generic(double* outputVector -#endif /* INCLUDED_VOLK_32f_CONVERT_64f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_convert_64f_u_H */ diff --git a/volk/include/volk/volk_32f_index_max_aligned16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h index 26322bfa2..d070e17d5 100644 --- a/volk/include/volk/volk_32f_index_max_aligned16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32f_index_max_16u_a16_H +#define INCLUDED_volk_32f_index_max_16u_a16_H #include <volk/volk_common.h> #include <inttypes.h> @@ -8,7 +8,7 @@ #if LV_HAVE_SSE4_1 #include<smmintrin.h> -static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -66,7 +66,7 @@ static inline void volk_32f_index_max_aligned16_sse4_1(unsigned int* target, con #if LV_HAVE_SSE #include<xmmintrin.h> -static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -123,7 +123,7 @@ static inline void volk_32f_index_max_aligned16_sse(unsigned int* target, const #endif /*LV_HAVE_SSE*/ #if LV_HAVE_GENERIC -static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, const float* src0, unsigned int num_points) { +static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ float max = src0[0]; unsigned int index = 0; @@ -145,4 +145,4 @@ static inline void volk_32f_index_max_aligned16_generic(unsigned int* target, co #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32F_INDEX_MAX_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_index_max_16u_a16_H*/ diff --git a/volk/include/volk/volk_32f_fm_detect_aligned16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h index c82239d74..ff4d5b19c 100644 --- a/volk/include/volk/volk_32f_fm_detect_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H -#define INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H +#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_noints The number of real values in the input vector. */ -static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -87,7 +87,7 @@ static inline void volk_32f_fm_detect_aligned16_sse(float* outputVector, const f \param saveValue A pointer to a float which contains the phase value of the sample before the first input sample. \param num_points The number of real values in the input vector. */ -static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ +static inline void volk_32f_s32f_32f_fm_detect_32f_a16_generic(float* outputVector, const float* inputVector, const float bound, float* saveValue, unsigned int num_points){ if (num_points < 1) { return; } @@ -117,4 +117,4 @@ static inline void volk_32f_fm_detect_aligned16_generic(float* outputVector, con -#endif /* INCLUDED_VOLK_32f_FM_DETECT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h index ff917525f..168245d65 100644 --- a/volk/include/volk/volk_32f_calc_spectral_noise_floor_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -16,7 +16,7 @@ \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -127,7 +127,7 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_sse(float* noise \param spectralExclusionValue The number of dB above the noise floor that a data point must be to be excluded from the noise floor calculation - default value is 20 \param noiseFloorAmplitude The noise floor of the input spectrum, in dB */ -static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ +static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_generic(float* noiseFloorAmplitude, const float* realDataPoints, const float spectralExclusionValue, const unsigned int num_points){ float sumMean = 0.0; unsigned int number; // find the sum (for mean), etc @@ -164,4 +164,4 @@ static inline void volk_32f_calc_spectral_noise_floor_aligned16_generic(float* n -#endif /* INCLUDED_VOLK_32f_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_16s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h index 7fbabd9c3..d6b16e336 100644 --- a/volk/include/volk/volk_32f_convert_16s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H +#define INCLUDED_volk_32f_s32f_convert_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -53,7 +53,7 @@ static inline void volk_32f_convert_16s_aligned16_sse2(int16_t* outputVector, co \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -93,7 +93,7 @@ static inline void volk_32f_convert_16s_aligned16_sse(int16_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_a16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -107,4 +107,4 @@ static inline void volk_32f_convert_16s_aligned16_generic(int16_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_16s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index d2bbdf13a..4d306e53c 100644 --- a/volk/include/volk/volk_32f_convert_16s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H +#define INCLUDED_volk_32f_s32f_convert_16i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; @@ -55,7 +55,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse2(int16_t* outputVector, \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -96,7 +96,7 @@ static inline void volk_32f_convert_16s_unaligned16_sse(int16_t* outputVector, c \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -110,4 +110,4 @@ static inline void volk_32f_convert_16s_unaligned16_generic(int16_t* outputVecto -#endif /* INCLUDED_VOLK_32f_CONVERT_16s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */ diff --git a/volk/include/volk/volk_32f_convert_32s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h index 011ef5d0e..ae874fd7b 100644 --- a/volk/include/volk/volk_32f_convert_32s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H +#define INCLUDED_volk_32f_s32f_convert_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -49,7 +49,7 @@ static inline void volk_32f_convert_32s_aligned16_sse2(int32_t* outputVector, co \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -89,7 +89,7 @@ static inline void volk_32f_convert_32s_aligned16_sse(int32_t* outputVector, con \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_a16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -103,4 +103,4 @@ static inline void volk_32f_convert_32s_aligned16_generic(int32_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_32s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_32s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index a6df826c7..561fcd800 100644 --- a/volk/include/volk/volk_32f_convert_32s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H +#define INCLUDED_volk_32f_s32f_convert_32i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse2(int32_t* outputVector, \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -92,7 +92,7 @@ static inline void volk_32f_convert_32s_unaligned16_sse(int32_t* outputVector, c \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int32_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -106,4 +106,4 @@ static inline void volk_32f_convert_32s_unaligned16_generic(int32_t* outputVecto -#endif /* INCLUDED_VOLK_32f_CONVERT_32s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */ diff --git a/volk/include/volk/volk_32f_convert_8s_aligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h index b9487b622..f64f2a213 100644 --- a/volk/include/volk/volk_32f_convert_8s_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H +#define INCLUDED_volk_32f_s32f_convert_8i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -60,7 +60,7 @@ static inline void volk_32f_convert_8s_aligned16_sse2(int8_t* outputVector, cons \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -100,13 +100,13 @@ static inline void volk_32f_convert_8s_aligned16_sse(int8_t* outputVector, const \param scalar The value multiplied against each point in the input buffer \param num_points The number of data values to be converted */ -static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_a16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; for(number = 0; number < num_points; number++){ - *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar)); + *outputVectorPtr++ = (int8_t)(*inputVectorPtr++ * scalar); } } #endif /* LV_HAVE_GENERIC */ @@ -114,4 +114,4 @@ static inline void volk_32f_convert_8s_aligned16_generic(int8_t* outputVector, c -#endif /* INCLUDED_VOLK_32f_CONVERT_8s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_a16_H */ diff --git a/volk/include/volk/volk_32f_convert_8s_unaligned16.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index e986dbc87..420693571 100644 --- a/volk/include/volk/volk_32f_convert_8s_unaligned16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H -#define INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H +#define INCLUDED_volk_32f_s32f_convert_8i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -62,7 +62,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse2(int8_t* outputVector, co \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -103,7 +103,7 @@ static inline void volk_32f_convert_8s_unaligned16_sse(int8_t* outputVector, con \param num_points The number of data values to be converted \note Input buffer does NOT need to be properly aligned */ -static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ int8_t* outputVectorPtr = outputVector; const float* inputVectorPtr = inputVector; unsigned int number = 0; @@ -117,4 +117,4 @@ static inline void volk_32f_convert_8s_unaligned16_generic(int8_t* outputVector, -#endif /* INCLUDED_VOLK_32f_CONVERT_8s_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */ diff --git a/volk/include/volk/volk_32f_normalize_aligned16.h b/volk/include/volk/volk_32f_s32f_normalize_a16.h index 1aabb1d9d..0850cddf7 100644 --- a/volk/include/volk/volk_32f_normalize_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_normalize_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H -#define INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_normalize_a16_H +#define INCLUDED_volk_32f_s32f_normalize_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param num_points The number of values in vecBuffer \param scalar The scale value to be applied to each buffer value */ -static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; @@ -49,7 +49,7 @@ static inline void volk_32f_normalize_aligned16_sse(float* vecBuffer, const floa \param bVector One of the vectors to be normalizeed \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector */ -static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; float* inputPtr = vecBuffer; const float invScalar = 1.0 / scalar; @@ -60,7 +60,22 @@ static inline void volk_32f_normalize_aligned16_generic(float* vecBuffer, const } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Normalizes the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be normalizeed + \param bVector One of the vectors to be normalizeed + \param num_points The number of values in aVector and bVector to be normalizeed together and stored into cVector +*/ +extern void volk_32f_s32f_normalize_a16_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points); +static inline void volk_32f_s32f_normalize_a16_orc(float* vecBuffer, const float scalar, unsigned int num_points){ + float invscalar = 1.0 / scalar; + volk_32f_s32f_normalize_a16_orc_impl(vecBuffer, vecBuffer, invscalar, num_points); +} +#endif /* LV_HAVE_GENERIC */ + -#endif /* INCLUDED_VOLK_32f_NORMALIZE_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_normalize_a16_H */ diff --git a/volk/include/volk/volk_32f_power_aligned16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h index 2ecd8eecb..3ed594d9a 100644 --- a/volk/include/volk/volk_32f_power_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_POWER_ALIGNED16_H -#define INCLUDED_VOLK_32f_POWER_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_power_32f_a16_H +#define INCLUDED_volk_32f_s32f_power_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -76,7 +76,7 @@ static inline void volk_32f_power_aligned16_sse4_1(float* cVector, const float* \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -127,7 +127,7 @@ static inline void volk_32f_power_aligned16_sse(float* cVector, const float* aVe \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32f_power_aligned16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ +static inline void volk_32f_s32f_power_32f_a16_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -141,4 +141,4 @@ static inline void volk_32f_power_aligned16_generic(float* cVector, const float* -#endif /* INCLUDED_VOLK_32f_POWER_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_power_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_stddev_aligned16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h index 1c6a08437..32f4fa067 100644 --- a/volk/include/volk/volk_32f_stddev_aligned16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H -#define INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H +#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#define INCLUDED_volk_32f_s32f_stddev_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -74,7 +74,7 @@ static inline void volk_32f_stddev_aligned16_sse4_1(float* stddev, const float* \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ unsigned int number = 0; @@ -119,7 +119,7 @@ static inline void volk_32f_stddev_aligned16_sse(float* stddev, const float* inp \param mean The mean of the input buffer \param num_points The number of values in input buffer to used in the stddev calculation */ -static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ +static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ float returnValue = 0; if(num_points > 0){ const float* aPtr = inputBuffer; @@ -141,4 +141,4 @@ static inline void volk_32f_stddev_aligned16_generic(float* stddev, const float* -#endif /* INCLUDED_VOLK_32f_STDDEV_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_sqrt_aligned16.h b/volk/include/volk/volk_32f_sqrt_32f_a16.h index 0b2eaf251..513c2cffe 100644 --- a/volk/include/volk/volk_32f_sqrt_aligned16.h +++ b/volk/include/volk/volk_32f_sqrt_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_SQRT_ALIGNED16_H -#define INCLUDED_VOLK_32f_SQRT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_sqrt_32f_a16_H +#define INCLUDED_volk_32f_sqrt_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -47,7 +47,7 @@ static inline void volk_32f_sqrt_aligned16_sse(float* cVector, const float* aVec \param aVector One of the vectors to be sqrted \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector */ -static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* aVector, unsigned int num_points){ +static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; unsigned int number = 0; @@ -58,7 +58,20 @@ static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int); +/*! + \brief Sqrts the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be sqrted + \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector +*/ +static inline void volk_32f_sqrt_32f_a16_orc(float* cVector, const float* aVector, unsigned int num_points){ + volk_32f_sqrt_32f_a16_orc_impl(cVector, aVector, num_points); +} + +#endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_VOLK_32f_SQRT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_sqrt_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h index 1cd502257..278089841 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_aligned16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H +#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -87,7 +87,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse4_1(float* stddev, floa \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -142,7 +142,7 @@ static inline void volk_32f_stddev_and_mean_aligned16_sse(float* stddev, float* \param inputBuffer The buffer of points to calculate the std deviation for \param num_points The number of values in input buffer to used in the stddev and mean calculations */ -static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ +static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ float returnValue = 0; float newMean = 0; if(num_points > 0){ @@ -166,4 +166,4 @@ static inline void volk_32f_stddev_and_mean_aligned16_generic(float* stddev, flo -#endif /* INCLUDED_VOLK_32f_STDDEV_AND_MEAN_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_32f_add_aligned16.h b/volk/include/volk/volk_32f_x2_add_32f_a16.h index 721c60fd6..d0d0e0a0e 100644 --- a/volk/include/volk/volk_32f_add_aligned16.h +++ b/volk/include/volk/volk_32f_x2_add_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_ADD_ALIGNED16_H -#define INCLUDED_VOLK_32f_ADD_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_add_32f_a16_H +#define INCLUDED_volk_32f_x2_add_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_add_aligned16_sse(float* cVector, const float* aVect \param bVector One of the vectors to be added \param num_points The number of values in aVector and bVector to be added together and stored into cVector */ -static inline void volk_32f_add_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32f_add_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +extern void volk_32f_x2_add_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_add_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_add_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32f_ADD_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_add_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_divide_aligned16.h b/volk/include/volk/volk_32f_x2_divide_32f_a16.h index c00700cd8..d844e25b0 100644 --- a/volk/include/volk/volk_32f_divide_aligned16.h +++ b/volk/include/volk/volk_32f_x2_divide_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H -#define INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_divide_32f_a16_H +#define INCLUDED_volk_32f_x2_divide_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aV \param bVector The divisor vector \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector */ -static inline void volk_32f_divide_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,20 @@ static inline void volk_32f_divide_aligned16_generic(float* cVector, const float } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Divides the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be divideed + \param bVector The divisor vector + \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector +*/ +extern void volk_32f_x2_divide_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_divide_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_divide_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_divide_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_dot_prod_aligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h index 3aee1136a..61aa56815 100644 --- a/volk/include/volk/volk_32f_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H #include<stdio.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32f_dot_prod_aligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_aligned16_generic(float * result, const flo #if LV_HAVE_SSE -static inline void volk_32f_dot_prod_aligned16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_aligned16_sse( float* result, const float* #include <pmmintrin.h> -static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_aligned16_sse3(float * result, const float #include <smmintrin.h> -static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_aligned16_sse4_1(float * result, const floa #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32f_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a16_H*/ diff --git a/volk/include/volk/volk_32f_dot_prod_unaligned16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index bce6aa15f..8469a3cea 100644 --- a/volk/include/volk/volk_32f_dot_prod_unaligned16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H -#define INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_u_H +#define INCLUDED_volk_32f_x2_dot_prod_32f_u_H #include<stdio.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) { float dotProduct = 0; const float* aPtr = input; @@ -27,7 +27,7 @@ static inline void volk_32f_dot_prod_unaligned16_generic(float * result, const f #if LV_HAVE_SSE -static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -77,7 +77,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse( float* result, const floa #include <pmmintrin.h> -static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * input, const float * taps, unsigned int num_points) { unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -124,7 +124,7 @@ static inline void volk_32f_dot_prod_unaligned16_sse3(float * result, const floa #include <smmintrin.h> -static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { +static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float * input, const float* taps, unsigned int num_points) { unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -181,4 +181,4 @@ static inline void volk_32f_dot_prod_unaligned16_sse4_1(float * result, const fl #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32f_DOT_PROD_UNALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_u_H*/ diff --git a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h index 859c6a0ef..29c9392df 100644 --- a/volk/include/volk/volk_32f_interleave_32fc_aligned16.h +++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a16_H +#define INCLUDED_volk_32f_x2_interleave_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ unsigned int number = 0; float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; @@ -56,7 +56,7 @@ static inline void volk_32f_interleave_32fc_aligned16_sse(lv_32fc_t* complexVect \param complexVector The complex output vector \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ +static inline void volk_32f_x2_interleave_32fc_a16_generic(lv_32fc_t* complexVector, const float* iBuffer, const float* qBuffer, unsigned int num_points){ float* complexVectorPtr = (float*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32f_interleave_32fc_aligned16_generic(lv_32fc_t* complex -#endif /* INCLUDED_VOLK_32f_INTERLEAVE_32FC_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a16_H */ diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_x2_max_32f_a16.h index 96aafb2bf..26e7f1246 100644 --- a/volk/include/volk/volk_32f_max_aligned16.h +++ b/volk/include/volk/volk_32f_x2_max_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32f_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_max_32f_a16_H +#define INCLUDED_volk_32f_x2_max_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVect \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_max_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_x2_max_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_max_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_max_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_max_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_x2_min_32f_a16.h index e247f4213..23bae044c 100644 --- a/volk/include/volk/volk_32f_min_aligned16.h +++ b/volk/include/volk/volk_32f_x2_min_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MIN_ALIGNED16_H -#define INCLUDED_VOLK_32f_MIN_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_min_32f_a16_H +#define INCLUDED_volk_32f_x2_min_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -53,7 +53,7 @@ static inline void volk_32f_min_aligned16_sse(float* cVector, const float* aVect \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_32f_min_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_x2_min_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_min_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_min_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_min_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_multiply_aligned16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h index b557580ab..a0dcfa86e 100644 --- a/volk/include/volk/volk_32f_multiply_aligned16.h +++ b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_multiply_32f_a16_H +#define INCLUDED_volk_32f_x2_multiply_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_multiply_aligned16_sse(float* cVector, const float* \param bVector One of the vectors to be multiplied \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32f_multiply_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32f_multiply_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Multiplys the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +extern void volk_32f_x2_multiply_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_multiply_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_multiply_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_multiply_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h index 476946b88..30306774d 100644 --- a/volk/include/volk/volk_32f_interleave_16sc_aligned16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -72,7 +72,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse2(lv_16sc_t* complexVec \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ unsigned int number = 0; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -136,7 +136,7 @@ static inline void volk_32f_interleave_16sc_aligned16_sse(lv_16sc_t* complexVect \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be interleaved */ -static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ +static inline void volk_32f_x2_s32f_interleave_16ic_a16_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ int16_t* complexVectorPtr = (int16_t*)complexVector; const float* iBufferPtr = iBuffer; const float* qBufferPtr = qBuffer; @@ -152,4 +152,4 @@ static inline void volk_32f_interleave_16sc_aligned16_generic(lv_16sc_t* complex -#endif /* INCLUDED_VOLK_32f_INTERLEAVE_16SC_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H */ diff --git a/volk/include/volk/volk_32f_subtract_aligned16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h index ac3f5e5d1..7404bfe79 100644 --- a/volk/include/volk/volk_32f_subtract_aligned16.h +++ b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H -#define INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x2_subtract_32f_a16_H +#define INCLUDED_volk_32f_x2_subtract_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32f_subtract_aligned16_sse(float* cVector, const float* \param bVector The vector to be subtracted \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector */ -static inline void volk_32f_subtract_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ float* cPtr = cVector; const float* aPtr = aVector; const float* bPtr= bVector; @@ -63,5 +63,19 @@ static inline void volk_32f_subtract_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Subtracts bVector form aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The initial vector + \param bVector The vector to be subtracted + \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector +*/ +extern void volk_32f_x2_subtract_32f_a16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_x2_subtract_32f_a16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_x2_subtract_32f_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + -#endif /* INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H */ +#endif /* INCLUDED_volk_32f_x2_subtract_32f_a16_H */ diff --git a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h index a326e62b1..af9e39537 100644 --- a/volk/include/volk/volk_32f_sum_of_poly_aligned16.h +++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H +#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H +#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -13,7 +13,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { float result = 0.0; @@ -100,7 +100,7 @@ static inline void volk_32f_sum_of_poly_aligned16_sse3(float* target, float* src #if LV_HAVE_GENERIC -static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { +static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { @@ -148,4 +148,4 @@ static inline void volk_32f_sum_of_poly_aligned16_generic(float* target, float* #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32F_SUM_OF_POLY_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a16_H*/ diff --git a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h index 436656ca0..514998800 100644 --- a/volk/include/volk/volk_32fc_32f_multiply_aligned16.h +++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a16_H +#define INCLUDED_volk_32fc_32f_multiply_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vectors containing the float values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -64,7 +64,7 @@ static inline void volk_32fc_32f_multiply_aligned16_sse(lv_32fc_t* cVector, cons \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ +static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const float* bPtr= bVector; @@ -76,7 +76,20 @@ static inline void volk_32fc_32f_multiply_aligned16_generic(lv_32fc_t* cVector, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector The complex vector to be multiplied + \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_32fc_32f_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32fc_32f_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){ + volk_32fc_32f_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_32fc_32f_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h index 02085cd1e..84d2576ed 100644 --- a/volk/include/volk/volk_32fc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -57,7 +57,7 @@ static inline void volk_32fc_deinterleave_32f_aligned16_sse(float* iBuffer, floa \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -72,4 +72,4 @@ static inline void volk_32fc_deinterleave_32f_aligned16_generic(float* iBuffer, -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h index 3d9ebccdd..34262a7af 100644 --- a/volk/include/volk/volk_32fc_deinterleave_64f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H +#define INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -59,7 +59,7 @@ static inline void volk_32fc_deinterleave_64f_aligned16_sse2(double* iBuffer, do \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_64f_x2_a16_generic(double* iBuffer, double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -75,4 +75,4 @@ static inline void volk_32fc_deinterleave_64f_aligned16_generic(double* iBuffer, -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_64F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h index 2af973bcc..9838ec88b 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a16_H +#define INCLUDED_volk_32fc_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; float* iBufferPtr = iBuffer; @@ -65,4 +65,4 @@ static inline void volk_32fc_deinterleave_real_32f_aligned16_generic(float* iBuf -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h index f408589c4..af392d074 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_64f_aligned16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a16_H +#define INCLUDED_volk_32fc_deinterleave_real_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; @@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_sse2(double* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_deinterleave_real_64f_a16_generic(double* iBuffer, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const float* complexVectorPtr = (float*)complexVector; double* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_32fc_deinterleave_real_64f_aligned16_generic(double* iBu -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a16_H */ diff --git a/volk/include/volk/volk_32fc_index_max_aligned16.h b/volk/include/volk/volk_32fc_index_max_16u_a16.h index d77a95f90..532ae4e7c 100644 --- a/volk/include/volk/volk_32fc_index_max_aligned16.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H -#define INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_index_max_16u_a16_H +#define INCLUDED_volk_32fc_index_max_16u_a16_H #include <volk/volk_common.h> #include<inttypes.h> @@ -11,7 +11,7 @@ #include<pmmintrin.h> -static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { @@ -189,7 +189,7 @@ static inline void volk_32fc_index_max_aligned16_sse3(unsigned int* target, lv_3 #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { +static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { float sq_dist = 0.0; float max = 0.0; unsigned int index = 0; @@ -212,4 +212,4 @@ static inline void volk_32fc_index_max_aligned16_generic(unsigned int* target, l #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_INDEX_MAX_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_index_max_16u_a16_H*/ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_a16.h index 7a8fd1ef9..be7216dce 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_magnitude_32f_a16_H +#define INCLUDED_volk_32fc_magnitude_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -56,7 +56,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse3(float* magnitudeVector \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -103,7 +103,7 @@ static inline void volk_32fc_magnitude_32f_aligned16_sse(float* magnitudeVector, \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +extern void volk_32fc_magnitude_32f_a16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_a16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_a16_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_magnitude_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h index df0ebb987..e9f74438d 100644 --- a/volk/include/volk/volk_32fc_atan2_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H -#define INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a16_H +#define INCLUDED_volk_32fc_s32f_atan2_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -81,7 +81,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse4_1(float* outputVector, co \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; float* outPtr = outputVector; @@ -139,7 +139,7 @@ static inline void volk_32fc_atan2_32f_aligned16_sse(float* outputVector, const \param normalizeFactor The atan2 results will be divided by this normalization factor. \param num_points The number of complex values in the input vector. */ -static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_atan2_32f_a16_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ float* outPtr = outputVector; const float* inPtr = (float*)inputVector; const float invNormalizeFactor = 1.0 / normalizeFactor; @@ -155,4 +155,4 @@ static inline void volk_32fc_atan2_32f_aligned16_generic(float* outputVector, co -#endif /* INCLUDED_VOLK_32fc_ATAN2_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h index 3026b2422..31465bff9 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -62,7 +62,7 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_sse(int16_t* iBuffe \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* iBufferPtr = iBuffer; unsigned int number = 0; @@ -77,4 +77,4 @@ static inline void volk_32fc_deinterleave_real_16s_aligned16_generic(int16_t* iB -#endif /* INCLUDED_VOLK_32fc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h index 4e64d8c22..530359600 100644 --- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -69,7 +69,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse3(int16_t* magnitudeVect \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -128,7 +128,7 @@ static inline void volk_32fc_magnitude_16s_aligned16_sse(int16_t* magnitudeVecto \param magnitudeVector The vector containing the real output values \param num_points The number of complex values in complexVector to be calculated and stored into cVector */ -static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ const float* complexVectorPtr = (float*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; @@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param scalar The scale value multiplied to the magnitude of each complex vector + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_32fc_s32f_magnitude_16i_a16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_s32f_magnitude_16i_a16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_s32f_magnitude_16i_a16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H */ diff --git a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h index 2d71ee4f8..3507fdb3c 100644 --- a/volk/include/volk/volk_32fc_32f_power_32fc_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H -#define INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a16_H +#define INCLUDED_volk_32fc_s32f_power_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -18,7 +18,7 @@ \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -89,7 +89,7 @@ static inline void volk_32fc_32f_power_32fc_aligned16_sse(lv_32fc_t* cVector, co \param power The power value to be applied to each data point \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector */ -static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ +static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float power, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; unsigned int number = 0; @@ -106,4 +106,4 @@ static inline void volk_32fc_32f_power_32fc_aligned16_generic(lv_32fc_t* cVector -#endif /* INCLUDED_VOLK_32fc_32f_POWER_32fc_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h index 645629b9d..39d8f7aa2 100644 --- a/volk/include/volk/volk_32fc_power_spectrum_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H +#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -19,7 +19,7 @@ \param normalizationFactor This value is divided against all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; @@ -96,7 +96,7 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_sse3(float* logPowerOu \param normalizationFactor This value is divided agains all the input values before the power is calculated \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ +static inline void volk_32fc_s32f_power_spectrum_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -123,4 +123,4 @@ static inline void volk_32fc_power_spectrum_32f_aligned16_generic(float* logPowe -#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRUM_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h index 52ec0f95b..0120b5307 100644 --- a/volk/include/volk/volk_32fc_power_spectral_density_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H -#define INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H +#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -20,7 +20,7 @@ \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ const float* inputPtr = (const float*)complexFFTInput; float* destPtr = logPowerOutput; uint64_t number = 0; @@ -103,7 +103,7 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_sse3(float* lo \param rbw The resolution bandwith of the fft spectrum \param num_points The number of fft data points */ -static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ +static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points){ // Calculate the Power of the complex point const float* inputPtr = (float*)complexFFTInput; float* realFFTDataPointsPtr = logPowerOutput; @@ -131,4 +131,4 @@ static inline void volk_32fc_power_spectral_density_32f_aligned16_generic(float* -#endif /* INCLUDED_VOLK_32fc_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a16_H */ diff --git a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h index 60103c1b5..a01971df3 100644 --- a/volk/include/volk/volk_32fc_conjugate_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H #include<volk/volk_complex.h> #include<stdio.h> @@ -8,7 +8,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -62,7 +62,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_generic(lv_32fc_t* res #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -203,7 +203,7 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse(lv_32fc_t* result, #endif #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -341,4 +341,4 @@ static inline void volk_32fc_conjugate_dot_prod_aligned16_sse_32(lv_32fc_t* resu -#endif /*INCLUDED_VOLK_32fc_CONJUGATE_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H*/ diff --git a/volk/include/volk/volk_32fc_dot_prod_aligned16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h index 1a834dc25..9a7b65ab4 100644 --- a/volk/include/volk/volk_32fc_dot_prod_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H -#define INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H #include <volk/volk_complex.h> #include <stdio.h> @@ -9,7 +9,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { float * res = (float*) result; float * in = (float*) input; @@ -59,7 +59,7 @@ static inline void volk_32fc_dot_prod_aligned16_generic(lv_32fc_t* result, const #if LV_HAVE_SSE && LV_HAVE_64 -static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm @@ -194,7 +194,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_64(lv_32fc_t* result, const #if LV_HAVE_SSE && LV_HAVE_32 -static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { asm volatile ( @@ -320,7 +320,7 @@ static inline void volk_32fc_dot_prod_aligned16_sse_32(lv_32fc_t* result, const #include <pmmintrin.h> -static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { lv_32fc_t dotProduct; @@ -377,8 +377,8 @@ static inline void volk_32fc_dot_prod_aligned16_sse3(lv_32fc_t* result, const lv #include <smmintrin.h> -static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - volk_32fc_dot_prod_aligned16_sse3(result, input, taps, num_bytes); +static inline void volk_32fc_x2_dot_prod_32fc_a16_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { + volk_32fc_x2_dot_prod_32fc_a16_sse3(result, input, taps, num_bytes); // SSE3 version runs twice as fast as the SSE4.1 version, so turning off SSE4 version for now /* __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; @@ -465,4 +465,4 @@ static inline void volk_32fc_dot_prod_aligned16_sse4_1(lv_32fc_t* result, const #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_VOLK_32fc_DOT_PROD_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H*/ diff --git a/volk/include/volk/volk_32fc_multiply_aligned16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h index 6a1649fdb..b4214f5d2 100644 --- a/volk/include/volk/volk_32fc_multiply_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h @@ -1,9 +1,10 @@ -#ifndef INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H -#define INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a16_H +#define INCLUDED_volk_32fc_x2_multiply_32fc_a16_H #include <inttypes.h> #include <stdio.h> #include <volk/volk_complex.h> +#include <float.h> #if LV_HAVE_SSE3 #include <pmmintrin.h> @@ -14,7 +15,7 @@ \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -60,7 +61,7 @@ static inline void volk_32fc_multiply_aligned16_sse3(lv_32fc_t* cVector, const l \param bVector One of the vectors to be multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ +static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; const lv_32fc_t* bPtr= bVector; @@ -72,7 +73,22 @@ static inline void volk_32fc_multiply_aligned16_generic(lv_32fc_t* cVector, cons } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_32fc_x2_multiply_32fc_a16_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); +static inline void volk_32fc_x2_multiply_32fc_a16_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + volk_32fc_x2_multiply_32fc_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + + -#endif /* INCLUDED_VOLK_32fc_MULTIPLY_ALIGNED16_H */ +#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a16_H */ diff --git a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h index 0fcc86f1e..6a863b16d 100644 --- a/volk/include/volk/volk_32fc_square_dist_scalar_mult_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H +#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -10,7 +10,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; @@ -106,7 +106,7 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_sse3(float* targe #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { +static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -123,4 +123,4 @@ static inline void volk_32fc_square_dist_scalar_mult_aligned16_generic(float* ta #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_H*/ diff --git a/volk/include/volk/volk_32fc_square_dist_aligned16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h index 6458ea4dd..406097fc8 100644 --- a/volk/include/volk/volk_32fc_square_dist_aligned16.h +++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H +#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a16_H +#define INCLUDED_volk_32fc_x2_square_dist_32f_a16_H #include<inttypes.h> #include<stdio.h> @@ -9,7 +9,7 @@ #include<xmmintrin.h> #include<pmmintrin.h> -static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -92,7 +92,7 @@ static inline void volk_32fc_square_dist_aligned16_sse3(float* target, lv_32fc_t #endif /*LV_HAVE_SSE3*/ #if LV_HAVE_GENERIC -static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { +static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; int i = 0; @@ -109,4 +109,4 @@ static inline void volk_32fc_square_dist_aligned16_generic(float* target, lv_32f #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_VOLK_32FC_SQUARE_DIST_ALIGNED16_H*/ +#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a16_H*/ diff --git a/volk/include/volk/volk_32s_convert_32f_aligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h index a407e68bd..0fcadd9cb 100644 --- a/volk/include/volk/volk_32s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_a16_H +#define INCLUDED_volk_32i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -55,7 +55,7 @@ static inline void volk_32s_convert_32f_aligned16_sse2(float* outputVector, cons \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_a16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -70,4 +70,4 @@ static inline void volk_32s_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_32s_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_32s_convert_32f_unaligned16.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h index ad7d4eb17..1dd6422f8 100644 --- a/volk/include/volk/volk_32s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H +#define INCLUDED_volk_32i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -57,7 +57,7 @@ static inline void volk_32s_convert_32f_unaligned16_sse2(float* outputVector, co \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_32i_s32f_convert_32f_u_generic(float* outputVector, const int32_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int32_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -72,4 +72,4 @@ static inline void volk_32s_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_32s_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_32s_and_aligned16.h b/volk/include/volk/volk_32i_x2_and_32i_a16.h index e9f1e3a43..3baa1d856 100644 --- a/volk/include/volk/volk_32s_and_aligned16.h +++ b/volk/include/volk/volk_32i_x2_and_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_AND_ALIGNED16_H -#define INCLUDED_VOLK_32s_AND_ALIGNED16_H +#ifndef INCLUDED_volk_32i_x2_and_32i_a16_H +#define INCLUDED_volk_32i_x2_and_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32s_and_aligned16_sse(int32_t* cVector, const int32_t* a \param bVector One of the vectors \param num_points The number of values in aVector and bVector to be anded together and stored into cVector */ -static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Ands the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors + \param bVector One of the vectors + \param num_points The number of values in aVector and bVector to be anded together and stored into cVector +*/ +extern void volk_32i_x2_and_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_and_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_and_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32s_AND_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_x2_and_32i_a16_H */ diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32i_x2_or_32i_a16.h index f4c427c4d..0be22f00a 100644 --- a/volk/include/volk/volk_32s_or_aligned16.h +++ b/volk/include/volk/volk_32i_x2_or_32i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32s_OR_ALIGNED16_H -#define INCLUDED_VOLK_32s_OR_ALIGNED16_H +#ifndef INCLUDED_volk_32i_x2_or_32i_a16_H +#define INCLUDED_volk_32i_x2_or_32i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -51,7 +51,7 @@ static inline void volk_32s_or_aligned16_sse(int32_t* cVector, const int32_t* aV \param bVector One of the vectors to be ored \param num_points The number of values in aVector and bVector to be ored together and stored into cVector */ -static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ +static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ int32_t* cPtr = cVector; const int32_t* aPtr = aVector; const int32_t* bPtr= bVector; @@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Ors the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be ored + \param bVector One of the vectors to be ored + \param num_points The number of values in aVector and bVector to be ored together and stored into cVector +*/ +extern void volk_32i_x2_or_32i_a16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32i_x2_or_32i_a16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32i_x2_or_32i_a16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ - -#endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */ +#endif /* INCLUDED_volk_32i_x2_or_32i_a16_H */ diff --git a/volk/include/volk/volk_32u_byteswap_aligned16.h b/volk/include/volk/volk_32u_byteswap_a16.h index 09173a9d5..7556ec7b1 100644 --- a/volk/include/volk/volk_32u_byteswap_aligned16.h +++ b/volk/include/volk/volk_32u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_32u_byteswap_a16_H +#define INCLUDED_volk_32u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int num_points){ unsigned int number = 0; uint32_t* inputPtr = intsToSwap; @@ -57,7 +57,7 @@ static inline void volk_32u_byteswap_aligned16_sse2(uint32_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, unsigned int num_points){ +static inline void volk_32u_byteswap_a16_generic(uint32_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = intsToSwap; unsigned int point; @@ -74,4 +74,4 @@ static inline void volk_32u_byteswap_aligned16_generic(uint32_t* intsToSwap, uns -#endif /* INCLUDED_VOLK_32u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_32u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_32u_popcnt_aligned16.h b/volk/include/volk/volk_32u_popcnt_a16.h index 37cfd112c..f6e25e4e8 100644 --- a/volk/include/volk/volk_32u_popcnt_aligned16.h +++ b/volk/include/volk/volk_32u_popcnt_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H -#define INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H +#ifndef INCLUDED_VOLK_32u_POPCNT_A16_H +#define INCLUDED_VOLK_32u_POPCNT_A16_H #include <stdio.h> #include <inttypes.h> @@ -7,7 +7,7 @@ #if LV_HAVE_GENERIC -static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) { // This is faster than a lookup table uint32_t retVal = value; @@ -27,10 +27,10 @@ static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32 #include <nmmintrin.h> -static inline void volk_32u_popcnt_aligned16_sse4_2(uint32_t* ret, const uint32_t value) { +static inline void volk_32u_popcnt_a16_sse4_2(uint32_t* ret, const uint32_t value) { *ret = _mm_popcnt_u32(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H*/ +#endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/ diff --git a/volk/include/volk/volk_64f_convert_32f_aligned16.h b/volk/include/volk/volk_64f_convert_32f_a16.h index 44df66104..7dca065f0 100644 --- a/volk/include/volk/volk_64f_convert_32f_aligned16.h +++ b/volk/include/volk/volk_64f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_64f_convert_32f_a16_H +#define INCLUDED_volk_64f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_aligned16_sse2(float* outputVector, cons \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_a16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_aligned16_generic(float* outputVector, c -#endif /* INCLUDED_VOLK_64f_CONVERT_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_a16_H */ diff --git a/volk/include/volk/volk_64f_convert_32f_unaligned16.h b/volk/include/volk/volk_64f_convert_32f_u.h index 08cfb6127..6338c1433 100644 --- a/volk/include/volk/volk_64f_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_64f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_64f_convert_32f_u_H +#define INCLUDED_volk_64f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -50,7 +50,7 @@ static inline void volk_64f_convert_32f_unaligned16_sse2(float* outputVector, co \param fVector The double vector values to be converted \param num_points The number of points in the two vectors to be converted */ -static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector, const double* inputVector, unsigned int num_points){ +static inline void volk_64f_convert_32f_u_generic(float* outputVector, const double* inputVector, unsigned int num_points){ float* outputVectorPtr = outputVector; const double* inputVectorPtr = inputVector; unsigned int number = 0; @@ -64,4 +64,4 @@ static inline void volk_64f_convert_32f_unaligned16_generic(float* outputVector, -#endif /* INCLUDED_VOLK_64f_CONVERT_32f_UNALIGNED16_H */ +#endif /* INCLUDED_volk_64f_convert_32f_u_H */ diff --git a/volk/include/volk/volk_64f_max_aligned16.h b/volk/include/volk/volk_64f_x2_max_64f_a16.h index ce4907a8c..4b0c1f5f1 100644 --- a/volk/include/volk/volk_64f_max_aligned16.h +++ b/volk/include/volk/volk_64f_x2_max_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_MAX_ALIGNED16_H -#define INCLUDED_VOLK_64f_MAX_ALIGNED16_H +#ifndef INCLUDED_volk_64f_x2_max_64f_a16_H +#define INCLUDED_volk_64f_x2_max_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_64f_max_aligned16_sse2(double* cVector, const double* aV \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_max_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_max_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_max_aligned16_generic(double* cVector, const double* #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_64f_MAX_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_x2_max_64f_a16_H */ diff --git a/volk/include/volk/volk_64f_min_aligned16.h b/volk/include/volk/volk_64f_x2_min_64f_a16.h index acf4d6b2a..aa961e384 100644 --- a/volk/include/volk/volk_64f_min_aligned16.h +++ b/volk/include/volk/volk_64f_x2_min_64f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64f_MIN_ALIGNED16_H -#define INCLUDED_VOLK_64f_MIN_ALIGNED16_H +#ifndef INCLUDED_volk_64f_x2_min_64f_a16_H +#define INCLUDED_volk_64f_x2_min_64f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int halfPoints = num_points / 2; @@ -53,7 +53,7 @@ static inline void volk_64f_min_aligned16_sse2(double* cVector, const double* aV \param bVector The vector to be checked \param num_points The number of values in aVector and bVector to be checked and stored into cVector */ -static inline void volk_64f_min_aligned16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ +static inline void volk_64f_x2_min_64f_a16_generic(double* cVector, const double* aVector, const double* bVector, unsigned int num_points){ double* cPtr = cVector; const double* aPtr = aVector; const double* bPtr= bVector; @@ -68,4 +68,4 @@ static inline void volk_64f_min_aligned16_generic(double* cVector, const double* #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_VOLK_64f_MIN_ALIGNED16_H */ +#endif /* INCLUDED_volk_64f_x2_min_64f_a16_H */ diff --git a/volk/include/volk/volk_64u_byteswap_aligned16.h b/volk/include/volk/volk_64u_byteswap_a16.h index d5e1b6f30..0eefe0138 100644 --- a/volk/include/volk/volk_64u_byteswap_aligned16.h +++ b/volk/include/volk/volk_64u_byteswap_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H -#define INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H +#ifndef INCLUDED_volk_64u_byteswap_a16_H +#define INCLUDED_volk_64u_byteswap_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; __m128i input, byte1, byte2, byte3, byte4, output; __m128i byte2mask = _mm_set1_epi32(0x00FF0000); @@ -65,7 +65,7 @@ static inline void volk_64u_byteswap_aligned16_sse2(uint64_t* intsToSwap, unsign \param intsToSwap The vector of data to byte swap \param numDataPoints The number of data points */ -static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, unsigned int num_points){ +static inline void volk_64u_byteswap_a16_generic(uint64_t* intsToSwap, unsigned int num_points){ uint32_t* inputPtr = (uint32_t*)intsToSwap; unsigned int point; for(point = 0; point < num_points; point++){ @@ -85,4 +85,4 @@ static inline void volk_64u_byteswap_aligned16_generic(uint64_t* intsToSwap, uns -#endif /* INCLUDED_VOLK_64u_BYTESWAP_ALIGNED16_H */ +#endif /* INCLUDED_volk_64u_byteswap_a16_H */ diff --git a/volk/include/volk/volk_64u_popcnt_aligned16.h b/volk/include/volk/volk_64u_popcnt_a16.h index 4d62f9375..59511dc29 100644 --- a/volk/include/volk/volk_64u_popcnt_aligned16.h +++ b/volk/include/volk/volk_64u_popcnt_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H -#define INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H +#ifndef INCLUDED_volk_64u_popcnt_a16_H +#define INCLUDED_volk_64u_popcnt_a16_H #include <stdio.h> #include <inttypes.h> @@ -8,7 +8,7 @@ #if LV_HAVE_GENERIC -static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) { const uint32_t* valueVector = (const uint32_t*)&value; @@ -40,11 +40,11 @@ static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64 #include <nmmintrin.h> -static inline void volk_64u_popcnt_aligned16_sse4_2(uint64_t* ret, const uint64_t value) { +static inline void volk_64u_popcnt_a16_sse4_2(uint64_t* ret, const uint64_t value) { *ret = _mm_popcnt_u64(value); } #endif /*LV_HAVE_SSE4_2*/ -#endif /*INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H*/ +#endif /*INCLUDED_volk_64u_popcnt_a16_H*/ diff --git a/volk/include/volk/volk_8s_convert_16s_aligned16.h b/volk/include/volk/volk_8i_convert_16i_a16.h index 0efe3c6a1..3d7045753 100644 --- a/volk/include/volk/volk_8s_convert_16s_aligned16.h +++ b/volk/include/volk/volk_8i_convert_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED16_H +#ifndef INCLUDED_volk_8i_convert_16i_a16_H +#define INCLUDED_volk_8i_convert_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -54,7 +54,7 @@ static inline void volk_8s_convert_16s_aligned16_sse4_1(int16_t* outputVector, c \param outputVector The 16 bit output data buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -65,6 +65,18 @@ static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into 16 bit integer data + \param inputVector The 8 bit input data buffer + \param outputVector The 16 bit output data buffer + \param num_points The number of data values to be converted + */ +extern void volk_8i_convert_16i_a16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); +static inline void volk_8i_convert_16i_a16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ + volk_8i_convert_16i_a16_orc_impl(outputVector, inputVector, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8s_convert_16s_unaligned16.h b/volk/include/volk/volk_8i_convert_16i_u.h index 05b916cea..bcff13406 100644 --- a/volk/include/volk/volk_8s_convert_16s_unaligned16.h +++ b/volk/include/volk/volk_8i_convert_16i_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED16_H +#ifndef INCLUDED_volk_8i_convert_16i_u_H +#define INCLUDED_volk_8i_convert_16i_u_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -56,7 +56,7 @@ static inline void volk_8s_convert_16s_unaligned16_sse4_1(int16_t* outputVector, \param num_points The number of data values to be converted \note Input and output buffers do NOT need to be properly aligned */ -static inline void volk_8s_convert_16s_unaligned16_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ +static inline void volk_8i_convert_16i_u_generic(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ int16_t* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; diff --git a/volk/include/volk/volk_8s_convert_32f_aligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h index 54b66ef8f..99a24ec10 100644 --- a/volk/include/volk/volk_8s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_a16_H +#define INCLUDED_volk_8i_s32f_convert_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -74,7 +74,7 @@ static inline void volk_8s_convert_32f_aligned16_sse4_1(float* outputVector, con \param scalar The value divided against each point in the output buffer \param num_points The number of data values to be converted */ -static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; @@ -86,6 +86,20 @@ static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, co } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 8 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +extern void volk_8i_s32f_convert_32f_a16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); +static inline void volk_8i_s32f_convert_32f_a16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ + float invscalar = 1.0 / scalar; + volk_8i_s32f_convert_32f_a16_orc_impl(outputVector, inputVector, invscalar, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8s_convert_32f_unaligned16.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h index 8019aac9a..1e30957e8 100644 --- a/volk/include/volk/volk_8s_convert_32f_unaligned16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H -#define INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED16_H +#ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H +#define INCLUDED_volk_8i_s32f_convert_32f_u_H #include <inttypes.h> #include <stdio.h> @@ -15,7 +15,7 @@ \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; @@ -76,7 +76,7 @@ static inline void volk_8s_convert_32f_unaligned16_sse4_1(float* outputVector, c \param num_points The number of data values to be converted \note Output buffer does NOT need to be properly aligned */ -static inline void volk_8s_convert_32f_unaligned16_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ +static inline void volk_8i_s32f_convert_32f_u_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ float* outputVectorPtr = outputVector; const int8_t* inputVectorPtr = inputVector; unsigned int number = 0; diff --git a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h index 38eaa49ea..91c9b2c58 100644 --- a/volk/include/volk/volk_8sc_deinterleave_16s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H +#define INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -59,7 +59,7 @@ static inline void volk_8sc_deinterleave_16s_aligned16_sse4_1(int16_t* iBuffer, \param qBuffer The Q buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; int16_t* qBufferPtr = qBuffer; @@ -74,4 +74,4 @@ static inline void volk_8sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer, -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_16S_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h index d0cb49494..bf3dc20dd 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_16s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a16_H +#define INCLUDED_volk_8ic_deinterleave_real_16i_a16_H #include <inttypes.h> #include <stdio.h> @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -49,7 +49,7 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_sse4_1(int16_t* iBuf \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_16i_a16_generic(int16_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; int16_t* iBufferPtr = iBuffer; @@ -63,4 +63,4 @@ static inline void volk_8sc_deinterleave_real_16s_aligned16_generic(int16_t* iBu -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_16s_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h index d84d64568..13de79423 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_8s_aligned16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h @@ -12,7 +12,7 @@ \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; @@ -50,7 +50,7 @@ static inline void volk_8sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer \param iBuffer The I buffer output data \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ +static inline void volk_8ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, const lv_8sc_t* complexVector, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (int8_t*)complexVector; int8_t* iBufferPtr = iBuffer; diff --git a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h index d0c118965..22c3ebb23 100644 --- a/volk/include/volk/volk_8sc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -84,7 +84,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse4_1(float* iBuffer, fl \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -145,7 +145,7 @@ static inline void volk_8sc_deinterleave_32f_aligned16_sse(float* iBuffer, float \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer, float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; float* qBufferPtr = qBuffer; @@ -161,4 +161,4 @@ static inline void volk_8sc_deinterleave_32f_aligned16_generic(float* iBuffer, f -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_32F_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H */ diff --git a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h index c849448ea..5f1430394 100644 --- a/volk/include/volk/volk_8sc_deinterleave_real_32f_aligned16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H -#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H #include <inttypes.h> #include <stdio.h> @@ -13,7 +13,7 @@ \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -70,7 +70,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse4_1(float* iBuffe \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ float* iBufferPtr = iBuffer; unsigned int number = 0; @@ -115,7 +115,7 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_sse(float* iBuffer, \param scalar The scaling value being multiplied against each data point \param num_points The number of complex data values to be deinterleaved */ -static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const int8_t* complexVectorPtr = (const int8_t*)complexVector; float* iBufferPtr = iBuffer; @@ -130,4 +130,4 @@ static inline void volk_8sc_deinterleave_real_32f_aligned16_generic(float* iBuff -#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_32f_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */ diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h index 470a67539..d9cacbf46 100644 --- a/volk/include/volk/volk_8sc_multiply_conjugate_16sc_aligned16.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H -#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H +#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -76,7 +76,7 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_sse4_1(lv_16sc_t* \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ +static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ unsigned int number = 0; int16_t* c16Ptr = (int16_t*)cVector; int8_t* a8Ptr = (int8_t*)aVector; @@ -99,4 +99,4 @@ static inline void volk_8sc_multiply_conjugate_16sc_aligned16_generic(lv_16sc_t* -#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_16sc_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */ diff --git a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h index 52b444cf7..6ec923a4f 100644 --- a/volk/include/volk/volk_8sc_multiply_conjugate_32fc_aligned16.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h @@ -1,5 +1,5 @@ -#ifndef INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H -#define INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H +#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H +#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H #include <inttypes.h> #include <stdio.h> @@ -14,7 +14,7 @@ \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; @@ -95,7 +95,7 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_sse4_1(lv_32fc_t* \param bVector The complex vector which will be converted to complex conjugate and multiplied \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ -static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ +static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){ unsigned int number = 0; float* cPtr = (float*)cVector; const float invScalar = 1.0 / scalar; @@ -119,4 +119,4 @@ static inline void volk_8sc_multiply_conjugate_32fc_aligned16_generic(lv_32fc_t* -#endif /* INCLUDED_VOLK_8sc_MULTIPLY_CONJUGATE_32fc_ALIGNED16_H */ +#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_H */ diff --git a/volk/include/volk/volk_register.py b/volk/include/volk/volk_register.py index 9fded9a3e..bc8f959af 100755 --- a/volk/include/volk/volk_register.py +++ b/volk/include/volk/volk_register.py @@ -55,7 +55,7 @@ functions = []; for line in mfile: - subline = re.search(".*(aligned).*", line); + subline = re.search(".*_(a16|u)\.h.*", line); if subline: subsubline = re.search("(?<=volk_).*", subline.group(0)); if subsubline: @@ -70,11 +70,10 @@ datatypes = set(datatypes); for line in mfile: for dt in datatypes: if dt in line: - subline = re.search("(volk_" + dt +"_.*(aligned).*\.h)", line); + subline = re.search("(volk_" + dt +"_.*(a16|u).*\.h)", line); if subline: subsubline = re.search(".+(?=\.h)", subline.group(0)); - functions.append(subsubline.group(0)); archs = []; diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 446ff574f..af7c7f335 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -24,18 +24,19 @@ include $(top_srcdir)/Makefile.common # of a hack. Figure out the right way to do this to find built # volk_config.h and volk_tables.h -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ -I$(top_builddir)/include \ $(LV_CXXFLAGS) $(WITH_INCLUDES) -# We build 2 libraries and 1 executable here. One library contains -# everything except the libcppunit QA code, and one contains only the -# libcppunit-based QA code. The C++ QA code is especially recommended +# We build 1 library and 1 executable here. The library contains +# everything except the QA code. The C++ QA code is especially recommended # when you have general purpose C or C++ code that may not get # thoroughly exercised by building and running a GR block. The # executable runs the QA code at "make check" time. # +# +# # N.B., If there's a SWIG generated shared library and associated # python code, it will be contained in ../python, not here. (That # code is conditionally built depending on the state of the @@ -44,13 +45,14 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" -#TESTS = test_all - +#TESTS = testqa +#orc stuff gets built in the ORC directory conditional to ORC being enabled. +#it gets linked in during the build of libvolk as an added library. +#there might be a better way to do this. lib_LTLIBRARIES = \ libvolk.la \ - libvolk_runtime.la \ - libvolk_qa.la + libvolk_runtime.la EXTRA_DIST = \ volk_mktables.c \ @@ -72,221 +74,63 @@ libvolk_la_SOURCES = \ volk.c \ volk_environment_init.c +volk_orc_LDFLAGS = \ + $(ORC_LDFLAGS) \ + -lorc-0.4 -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +volk_orc_LIBADD = \ + ../orc/libvolk_orc.la +if LV_HAVE_ORC +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) +else +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_la_LIBADD = - +endif # ---------------------------------------------------------------- # The QA library. Note libvolk.la in LIBADD # ---------------------------------------------------------------- -libvolk_qa_la_SOURCES = \ - qa_volk.cc \ - qa_16s_quad_max_star_aligned16.cc \ - qa_32fc_dot_prod_aligned16.cc \ - qa_32fc_square_dist_aligned16.cc \ - qa_32fc_square_dist_scalar_mult_aligned16.cc \ - qa_32f_sum_of_poly_aligned16.cc \ - qa_32fc_index_max_aligned16.cc \ - qa_32f_index_max_aligned16.cc \ - qa_32fc_conjugate_dot_prod_aligned16.cc \ - qa_32fc_conjugate_dot_prod_unaligned.cc \ - qa_16s_permute_and_scalar_add_aligned16.cc \ - qa_16s_branch_4_state_8_aligned16.cc \ - qa_16s_max_star_horizontal_aligned16.cc \ - qa_16s_max_star_aligned16.cc \ - qa_16s_add_quad_aligned16.cc \ - qa_32f_add_aligned16.cc \ - qa_32f_subtract_aligned16.cc \ - qa_32f_max_aligned16.cc \ - qa_32f_min_aligned16.cc \ - qa_64f_max_aligned16.cc \ - qa_64f_min_aligned16.cc \ - qa_32s_and_aligned16.cc \ - qa_32s_or_aligned16.cc \ - qa_32f_dot_prod_aligned16.cc \ - qa_32f_dot_prod_unaligned16.cc \ - qa_32f_fm_detect_aligned16.cc \ - qa_32fc_32f_multiply_aligned16.cc \ - qa_32fc_multiply_aligned16.cc \ - qa_32f_divide_aligned16.cc \ - qa_32f_multiply_aligned16.cc \ - qa_32f_sqrt_aligned16.cc \ - qa_8sc_multiply_conjugate_16sc_aligned16.cc \ - qa_8sc_multiply_conjugate_32fc_aligned16.cc \ - qa_32u_popcnt_aligned16.cc \ - qa_64u_popcnt_aligned16.cc \ - qa_64u_byteswap_aligned16.cc \ - qa_8sc_deinterleave_32f_aligned16.cc \ - qa_16sc_deinterleave_32f_aligned16.cc \ - qa_8sc_deinterleave_16s_aligned16.cc \ - qa_32f_interleave_32fc_aligned16.cc \ - qa_16u_byteswap_aligned16.cc \ - qa_16sc_deinterleave_16s_aligned16.cc \ - qa_32fc_deinterleave_real_32f_aligned16.cc \ - qa_32fc_magnitude_32f_aligned16.cc \ - qa_32fc_deinterleave_real_64f_aligned16.cc \ - qa_32fc_deinterleave_real_16s_aligned16.cc \ - qa_32fc_magnitude_16s_aligned16.cc \ - qa_32fc_deinterleave_32f_aligned16.cc \ - qa_8sc_deinterleave_real_8s_aligned16.cc \ - qa_32fc_deinterleave_64f_aligned16.cc \ - qa_32f_interleave_16sc_aligned16.cc \ - qa_16sc_deinterleave_real_8s_aligned16.cc \ - qa_16sc_deinterleave_real_32f_aligned16.cc \ - qa_16sc_magnitude_32f_aligned16.cc \ - qa_32u_byteswap_aligned16.cc \ - qa_16sc_deinterleave_real_16s_aligned16.cc \ - qa_8sc_deinterleave_real_32f_aligned16.cc \ - qa_16sc_magnitude_16s_aligned16.cc \ - qa_32f_normalize_aligned16.cc \ - qa_8sc_deinterleave_real_16s_aligned16.cc \ - qa_16s_convert_32f_aligned16.cc \ - qa_16s_convert_32f_unaligned16.cc \ - qa_16s_convert_8s_aligned16.cc \ - qa_16s_convert_8s_unaligned16.cc \ - qa_32f_convert_16s_aligned16.cc \ - qa_32f_convert_16s_unaligned16.cc \ - qa_32f_convert_32s_aligned16.cc \ - qa_32f_convert_32s_unaligned16.cc \ - qa_32f_convert_64f_aligned16.cc \ - qa_32f_convert_64f_unaligned16.cc \ - qa_32f_convert_8s_aligned16.cc \ - qa_32f_convert_8s_unaligned16.cc \ - qa_32s_convert_32f_aligned16.cc \ - qa_32s_convert_32f_unaligned16.cc \ - qa_64f_convert_32f_aligned16.cc \ - qa_64f_convert_32f_unaligned16.cc \ - qa_8s_convert_16s_aligned16.cc \ - qa_8s_convert_16s_unaligned16.cc \ - qa_8s_convert_32f_aligned16.cc \ - qa_8s_convert_32f_unaligned16.cc \ - qa_32fc_32f_power_32fc_aligned16.cc \ - qa_32f_power_aligned16.cc \ - qa_32fc_atan2_32f_aligned16.cc \ - qa_32fc_power_spectral_density_32f_aligned16.cc \ - qa_32fc_power_spectrum_32f_aligned16.cc \ - qa_32f_calc_spectral_noise_floor_aligned16.cc \ - qa_32f_accumulator_aligned16.cc \ - qa_32f_stddev_aligned16.cc \ - qa_32f_stddev_and_mean_aligned16.cc +#libvolk_qa_la_SOURCES = \ +# qa_utils.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost -libvolk_qa_la_LIBADD = \ - libvolk.la \ - libvolk_runtime.la \ - $(CPPUNIT_LIBS) +#libvolk_qa_la_LIBADD = \ +# libvolk.la \ +# libvolk_runtime.la # ---------------------------------------------------------------- # headers that don't get installed # ---------------------------------------------------------------- noinst_HEADERS = \ volk_init.h \ - qa_volk.h \ - gcc_x86_cpuid.h \ - qa_16s_quad_max_star_aligned16.h \ - qa_32fc_dot_prod_aligned16.h \ - qa_32fc_square_dist_aligned16.h \ - qa_32fc_square_dist_scalar_mult_aligned16.h \ - qa_32f_sum_of_poly_aligned16.h \ - qa_32fc_index_max_aligned16.h \ - qa_32f_index_max_aligned16.h \ - qa_32fc_conjugate_dot_prod_aligned16.h \ - qa_32fc_conjugate_dot_prod_unaligned.h \ - qa_16s_permute_and_scalar_add_aligned16.h \ - qa_16s_branch_4_state_8_aligned16.h \ - qa_16s_max_star_horizontal_aligned16.h \ - qa_16s_max_star_aligned16.h \ - qa_16s_add_quad_aligned16.h \ - qa_32f_add_aligned16.h \ - qa_32f_subtract_aligned16.h \ - qa_32f_max_aligned16.h \ - qa_32f_min_aligned16.h \ - qa_64f_max_aligned16.h \ - qa_64f_min_aligned16.h \ - qa_32s_and_aligned16.h \ - qa_32s_or_aligned16.h \ - qa_32f_dot_prod_aligned16.h \ - qa_32f_dot_prod_unaligned16.h \ - qa_32f_fm_detect_aligned16.h \ - qa_32fc_32f_multiply_aligned16.h \ - qa_32fc_multiply_aligned16.h \ - qa_32f_divide_aligned16.h \ - qa_32f_multiply_aligned16.h \ - qa_32f_sqrt_aligned16.h \ - qa_8sc_multiply_conjugate_16sc_aligned16.h \ - qa_8sc_multiply_conjugate_32fc_aligned16.h \ - qa_32u_popcnt_aligned16.h \ - qa_64u_popcnt_aligned16.h \ - qa_64u_byteswap_aligned16.h \ - qa_8sc_deinterleave_32f_aligned16.h \ - qa_16sc_deinterleave_32f_aligned16.h \ - qa_8sc_deinterleave_16s_aligned16.h \ - qa_32f_interleave_32fc_aligned16.h \ - qa_16u_byteswap_aligned16.h \ - qa_16sc_deinterleave_16s_aligned16.h \ - qa_32fc_deinterleave_real_32f_aligned16.h \ - qa_32fc_magnitude_32f_aligned16.h \ - qa_32fc_deinterleave_real_64f_aligned16.h \ - qa_32fc_deinterleave_real_16s_aligned16.h \ - qa_32fc_magnitude_16s_aligned16.h \ - qa_32fc_deinterleave_32f_aligned16.h \ - qa_8sc_deinterleave_real_8s_aligned16.h \ - qa_32fc_deinterleave_64f_aligned16.h \ - qa_32f_interleave_16sc_aligned16.h \ - qa_16sc_deinterleave_real_8s_aligned16.h \ - qa_16sc_deinterleave_real_32f_aligned16.h \ - qa_16sc_magnitude_32f_aligned16.h \ - qa_32u_byteswap_aligned16.h \ - qa_16sc_deinterleave_real_16s_aligned16.h \ - qa_8sc_deinterleave_real_32f_aligned16.h \ - qa_16sc_magnitude_16s_aligned16.h \ - qa_32f_normalize_aligned16.h \ - qa_8sc_deinterleave_real_16s_aligned16.h \ - qa_16s_convert_32f_aligned16.h \ - qa_16s_convert_32f_unaligned16.h \ - qa_16s_convert_8s_aligned16.h \ - qa_16s_convert_8s_unaligned16.h \ - qa_32f_convert_16s_aligned16.h \ - qa_32f_convert_16s_unaligned16.h \ - qa_32f_convert_32s_aligned16.h \ - qa_32f_convert_32s_unaligned16.h \ - qa_32f_convert_64f_aligned16.h \ - qa_32f_convert_64f_unaligned16.h \ - qa_32f_convert_8s_aligned16.h \ - qa_32f_convert_8s_unaligned16.h \ - qa_32s_convert_32f_aligned16.h \ - qa_32s_convert_32f_unaligned16.h \ - qa_64f_convert_32f_aligned16.h \ - qa_64f_convert_32f_unaligned16.h \ - qa_8s_convert_16s_aligned16.h \ - qa_8s_convert_16s_unaligned16.h \ - qa_8s_convert_32f_aligned16.h \ - qa_8s_convert_32f_unaligned16.h \ - qa_32fc_32f_power_32fc_aligned16.h \ - qa_32f_power_aligned16.h \ - qa_32fc_atan2_32f_aligned16.h \ - qa_32fc_power_spectral_density_32f_aligned16.h \ - qa_32fc_power_spectrum_32f_aligned16.h \ - qa_32f_calc_spectral_noise_floor_aligned16.h \ - qa_32f_accumulator_aligned16.h \ - qa_32f_stddev_aligned16.h \ - qa_32f_stddev_and_mean_aligned16.h - + qa_utils.h \ + assembly.h # ---------------------------------------------------------------- # Our test program # ---------------------------------------------------------------- noinst_PROGRAMS = \ - test_all - -test_all_SOURCES = test_all.cc -test_all_LDADD = libvolk.la libvolk_runtime.la libvolk_qa.la + testqa +testqa_SOURCES = testqa.cc qa_utils.cc +testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) +testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) +if LV_HAVE_ORC +testqa_LDADD = \ + libvolk.la \ + libvolk_runtime.la \ + ../orc/libvolk_orc.la +else +testqa_LDADD = \ + libvolk.la \ + libvolk_runtime.la +endif distclean-local: rm -f volk.c diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc deleted file mode 100644 index 6215f4a64..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_convert_32f_aligned16.h> -#include <volk/volk_16s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h deleted file mode 100644 index ef813d96f..000000000 --- a/volk/lib/qa_16s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc deleted file mode 100644 index 46c2e48ac..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_convert_32f_unaligned16.h> -#include <volk/volk_16s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE - -void qa_16s_convert_32f_unaligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h deleted file mode 100644 index aeb04f770..000000000 --- a/volk/lib/qa_16s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc deleted file mode 100644 index 8225aa0cf..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_convert_8s_aligned16.h> -#include <volk/volk_16s_convert_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h deleted file mode 100644 index 2e409d0cc..000000000 --- a/volk/lib/qa_16s_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc deleted file mode 100644 index e6ce5030e..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_convert_8s_unaligned16.h> -#include <volk/volk_16s_convert_8s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_16s_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16s_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int16_t input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("16s_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h deleted file mode 100644 index 4b2fe9e42..000000000 --- a/volk/lib/qa_16s_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc deleted file mode 100644 index c6f828ba6..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include <volk/volk.h> -#include <qa_16s_max_star_aligned16.h> -#include <volk/volk_16s_max_star_aligned16.h> -#include <cstdlib> -#include <ctime> -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - - -void qa_16s_max_star_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 6400; - const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[1] __attribute__ ((aligned (16))); - - short output1[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } -} - -#endif diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h deleted file mode 100644 index 119f87c4d..000000000 --- a/volk/lib/qa_16s_max_star_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_max_star_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */ diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc deleted file mode 100644 index 0a58570e2..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16s_max_star_horizontal_aligned16.h> -#include <volk/volk_16s_max_star_horizontal_aligned16.h> -#include <cstdlib> -#include <ctime> -//test for ssse3 - -#ifndef LV_HAVE_SSSE3 - -void qa_16s_max_star_horizontal_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - - -void qa_16s_max_star_horizontal_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 32; - const int ITERS = 1; - short input0[vlen] __attribute__ ((aligned (16))); - short output0[vlen>>1] __attribute__ ((aligned (16))); - - short output1[vlen>>1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - short plus0 = ((short) (rand() - (RAND_MAX/2))); - - short minus0 = ((short) (rand() - (RAND_MAX/2))); - - input0[i] = plus0 - minus0; - - } - printf("16s_max_star_horizontal_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic"); - volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen); - /* volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3"); - volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/ - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < (vlen >> 1); ++i) { - // printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - - } - for(int i = 0; i < (vlen >> 1); ++i) { - - CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); - } - } - - -#endif - diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h deleted file mode 100644 index 9f9757253..000000000 --- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H -#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index c775e8596..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_16s_aligned16.h> -#include <volk/volk_16sc_deinterleave_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_sse21[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); - - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 995ab5b34..000000000 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index b25094e89..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_32f_aligned16.h> -#include <volk/volk_16sc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16sc_deinterleave_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - float output_sse21[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h deleted file mode 100644 index fea3b6c2d..000000000 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index c67064ea6..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0)); - } - printf("16sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index ebb70b97a..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index f86f03b88..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,124 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE */ - -#else - -void qa_16sc_deinterleave_real_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* SSE4_1 */ diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index e83426473..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index dd446567e..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_deinterleave_real_8s_aligned16.h> -#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0); - } - printf("16sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 04e5511e5..000000000 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc deleted file mode 100644 index 9799ef43b..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_magnitude_16s_aligned16.h> -#include <volk/volk_16sc_magnitude_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_16sc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* loadInput = (int16_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } - printf("16sc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h deleted file mode 100644 index 4664b70f4..000000000 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc deleted file mode 100644 index 1ebe644c5..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,111 +0,0 @@ -#include <volk/volk.h> -#include <qa_16sc_magnitude_32f_aligned16.h> -#include <volk/volk_16sc_magnitude_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_16sc_magnitude_32f_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2)); - } - printf("16sc_magnitude_32f_aligned\n"); - - float scale = 32768.0; - for(int i = 0; i < vlen; ++i) { - float re = (float)(input0[i].real())/scale; - float im = (float)(input0[i].imag())/scale; - output_known[i] = sqrt(re*re + im*im); - } - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 100; ++i) { - printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag()); - printf("generic... %f == %f\n", output_generic[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); - } -} - -#else - -void qa_16sc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - int16_t* inputLoad = (int16_t*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("16sc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h deleted file mode 100644 index 0c25673ea..000000000 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc deleted file mode 100644 index ea117a820..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_16u_byteswap_aligned16.h> -#include <volk/volk_16u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_16u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_16u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint16_t output0[vlen] __attribute__ ((aligned (16))); - uint16_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint16_t)); - - printf("16u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h deleted file mode 100644 index e11b23e3f..000000000 --- a/volk/lib/qa_16u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_16u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc deleted file mode 100644 index 0defef283..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_accumulator_aligned16.h> -#include <volk/volk_32f_accumulator_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_accumulator_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_accumulator_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float accumulator_generic; - float accumulator_sse; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_accumulator_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4); -} - -#endif diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h deleted file mode 100644 index 0004d3ff0..000000000 --- a/volk/lib/qa_32f_accumulator_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H -#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_accumulator_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc deleted file mode 100644 index f80d562d4..000000000 --- a/volk/lib/qa_32f_add_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_add_aligned16.h> -#include <volk/volk_32f_add_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_add_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] + input1[i]; - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_add_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_add_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h deleted file mode 100644 index 58e2a151c..000000000 --- a/volk/lib/qa_32f_add_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H -#define INCLUDED_QA_32F_ADD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_add_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_add_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc deleted file mode 100644 index 5d6987333..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_calc_spectral_noise_floor_aligned16.h> -#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h> -#include <cstdlib> -#include <math.h> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_calc_spectral_noise_floor_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[1] __attribute__ ((aligned (16))); - float output01[1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_calc_spectral_noise_floor_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < 1; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h deleted file mode 100644 index c5dce2c4b..000000000 --- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H -#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc deleted file mode 100644 index 3e2452e68..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_16s_aligned16.h> -#include <volk/volk_32f_convert_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h deleted file mode 100644 index fce1eb417..000000000 --- a/volk/lib/qa_32f_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc deleted file mode 100644 index e016b7ff7..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_16s_unaligned16.h> -#include <volk/volk_32f_convert_16s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_16s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_16s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h deleted file mode 100644 index 492bc80e6..000000000 --- a/volk/lib/qa_32f_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc deleted file mode 100644 index abceb52fb..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_32s_aligned16.h> -#include <volk/volk_32f_convert_32s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_32s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h deleted file mode 100644 index 97d854463..000000000 --- a/volk/lib/qa_32f_convert_32s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc deleted file mode 100644 index 90f84b56f..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_32s_unaligned16.h> -#include <volk/volk_32f_convert_32s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_32s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_32s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int32_t output_generic[vlen] __attribute__ ((aligned (16))); - int32_t output_sse[vlen] __attribute__ ((aligned (16))); - int32_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_32s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h deleted file mode 100644 index 5d662d86d..000000000 --- a/volk/lib/qa_32f_convert_32s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc deleted file mode 100644 index 1d0754ac9..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_64f_aligned16.h> -#include <volk/volk_32f_convert_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h deleted file mode 100644 index 41eb3e094..000000000 --- a/volk/lib/qa_32f_convert_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc deleted file mode 100644 index 6f7d5066d..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_64f_unaligned16.h> -#include <volk/volk_32f_convert_64f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_64f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_64f_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_64f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h deleted file mode 100644 index 4b144f033..000000000 --- a/volk/lib/qa_32f_convert_64f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc deleted file mode 100644 index 6a53629b5..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_8s_aligned16.h> -#include <volk/volk_32f_convert_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h deleted file mode 100644 index 68a523f34..000000000 --- a/volk/lib/qa_32f_convert_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc deleted file mode 100644 index fbc5c20e6..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_convert_8s_unaligned16.h> -#include <volk/volk_32f_convert_8s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32f_convert_8s_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_convert_8s_unaligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_sse[vlen] __attribute__ ((aligned (16))); - int8_t output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_convert_8s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1); - CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1); - } -} - -#endif diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h deleted file mode 100644 index 88d4ff42a..000000000 --- a/volk/lib/qa_32f_convert_8s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H -#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc deleted file mode 100644 index 3257a3751..000000000 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_divide_aligned16.h> -#include <volk/volk_32f_divide_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_divide_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] / input1[i]; - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_divide_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_divide_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h deleted file mode 100644 index 79d5ae4b8..000000000 --- a/volk/lib/qa_32f_divide_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H -#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_divide_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc deleted file mode 100644 index 98c1f2d99..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_aligned16::t1() { - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_aligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h deleted file mode 100644 index 6931a9e98..000000000 --- a/volk/lib/qa_32f_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc deleted file mode 100644 index 8e97d4249..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.cc +++ /dev/null @@ -1,190 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_dot_prod_unaligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifndef LV_HAVE_SSE4_1 - -#ifdef LV_HAVE_SSE3 -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen* sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f ... sse: %f ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]); - - for(i = 0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse); - free(result_sse3); - -} -#else -void qa_32f_dot_prod_unaligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - -#else - -void qa_32f_dot_prod_unaligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 4095; - const int ITER = 100000; - - int i; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float * input; - float * taps; - - float * result_generic; - float * result_sse; - float * result_sse3; - float * result_sse4_1; - - ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float)); - - input = &input[1]; // Make sure the buffer is unaligned - taps = &taps[1]; // Make sure the buffer is unaligned - - random_floats((float*)input, vlen); - random_floats((float*)taps, vlen); - - printf("32f_dot_prod_unaligned16\n"); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - start = clock(); - for(i = 0; i < ITER; i++){ - get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - //printf("generic: %f ... sse: %f ... sse3 %f ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]); - for(i =0; i < ITER; i++){ - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA); - } - - free(&input[-1]); - free(&taps[-1]); - free(result_generic); - free(result_sse); - free(result_sse3); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h deleted file mode 100644 index e8bad07fe..000000000 --- a/volk/lib/qa_32f_dot_prod_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H -#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc deleted file mode 100644 index a7ae60780..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_interleave_16sc_aligned16.h> -#include <volk/volk_32f_interleave_16sc_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32f_interleave_16sc_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_16sc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16))); - std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16))); - std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_interleave_16sc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h deleted file mode 100644 index 8d2914817..000000000 --- a/volk/lib/qa_32f_interleave_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc deleted file mode 100644 index 333b6fce8..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_interleave_32fc_aligned16.h> -#include <volk/volk_32f_interleave_32fc_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_interleave_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_interleave_32fc_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - std::complex<float> output_generic[vlen] __attribute__ ((aligned (16))); - std::complex<float> output_sse[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_interleave_32fc_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h deleted file mode 100644 index cba518d37..000000000 --- a/volk/lib/qa_32f_interleave_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H -#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc deleted file mode 100644 index ceb913cb4..000000000 --- a/volk/lib/qa_32f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_max_aligned16.h> -#include <volk/volk_32f_max_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_max_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h deleted file mode 100644 index d535479f4..000000000 --- a/volk/lib/qa_32f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H -#define INCLUDED_QA_32F_MAX_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc deleted file mode 100644 index 580a60e7d..000000000 --- a/volk/lib/qa_32f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_min_aligned16.h> -#include <volk/volk_32f_min_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_min_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h deleted file mode 100644 index 90961ac92..000000000 --- a/volk/lib/qa_32f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H -#define INCLUDED_QA_32F_MIN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc deleted file mode 100644 index 0c242b649..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_multiply_aligned16.h> -#include <volk/volk_32f_multiply_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_multiply_aligned16::t1() { - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - output_known[i] = input0[i] * input1[i]; - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f, %f\n", input0[i], input1[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); - } -} - -#else - -void qa_32f_multiply_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_multiply_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h deleted file mode 100644 index 7032a2ad4..000000000 --- a/volk/lib/qa_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc deleted file mode 100644 index 1c7b485a6..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ /dev/null @@ -1,66 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_normalize_aligned16.h> -#include <volk/volk_32f_normalize_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_normalize_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_normalize_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 320001; - const int ITERS = 100; - - float* output0; - float* output01; - ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); - ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); - - for(int i = 0; i < vlen; ++i) { - output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(float)); - printf("32f_normalize_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - // printf("%e...%e\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } - - free(output0); - free(output01); -} - -#endif diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h deleted file mode 100644 index 7c421eb82..000000000 --- a/volk/lib/qa_32f_normalize_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H -#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_normalize_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc deleted file mode 100644 index 1b331daeb..000000000 --- a/volk/lib/qa_32f_power_aligned16.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_power_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32f_power_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - float* input; - int i; - - float* result_generic; - float* result_sse; - float* result_sse4_1; - - ret = posix_memalign((void**)&input, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float)); - - random_floats((float*)input, vlen); - - const float power = 3; - - printf("32f_power_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_power_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - - for(i = 0; i < vlen; i++){ - //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA); - CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32f_power_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h deleted file mode 100644 index d45df4e56..000000000 --- a/volk/lib/qa_32f_power_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H -#define INCLUDED_QA_32F_POWER_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_power_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_power_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc deleted file mode 100644 index 62d55767a..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2010 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include <volk/volk.h> -#include <qa_32f_sqrt_aligned16.h> -#include <volk/volk_32f_sqrt_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_sqrt_aligned16::t1() { - printf("sse not available... no test performed\n"); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output_known[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX)); - output_known[i] = sqrt(input0[i]); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - /* - for(int i = 0; i < 10; ++i) { - printf("inputs: %f\n", input0[i]); - printf("generic... %f == %f\n", output0[i], output_known[i]); - } - */ - - for(int i = 0; i < vlen; ++i) { - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4); - } -} - -#else - -void qa_32f_sqrt_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - // No reason to test negative numbers because they result in NaN. - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX)); - } - printf("32f_sqrt_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h deleted file mode 100644 index e4b99d981..000000000 --- a/volk/lib/qa_32f_sqrt_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H -#define INCLUDED_QA_32F_SQRT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_sqrt_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc deleted file mode 100644 index 5934d70df..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_stddev_aligned16.h> -#include <volk/volk_32f_stddev_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean = 0; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - mean += input0[i]; - } - mean /= static_cast<float>(vlen); - - printf("32f_stddev_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h deleted file mode 100644 index 7f8d7a5fc..000000000 --- a/volk/lib/qa_32f_stddev_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_stddev_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc deleted file mode 100644 index 78c701d78..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32f_stddev_and_mean_aligned16.h> -#include <volk/volk_32f_stddev_and_mean_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_stddev_and_mean_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_stddev_and_mean_aligned16::t1() { - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - - float stddev_generic; - float stddev_sse; - float stddev_sse4_1; - float mean_generic; - float mean_sse; - float mean_sse4_1; - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_stddev_and_mean_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4); - -} - -#endif diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h deleted file mode 100644 index e08bd249a..000000000 --- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H -#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc deleted file mode 100644 index ffe4b504c..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_subtract_aligned16.h> -#include <volk/volk_32f_subtract_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32f_subtract_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32f_subtract_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - float input0[vlen] __attribute__ ((aligned (16))); - float input1[vlen] __attribute__ ((aligned (16))); - - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); - } - printf("32f_subtract_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h deleted file mode 100644 index 97c14f129..000000000 --- a/volk/lib/qa_32f_subtract_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H -#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_subtract_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc deleted file mode 100644 index 494776357..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc +++ /dev/null @@ -1,142 +0,0 @@ -#include <volk/volk.h> -#include <qa_32f_sum_of_poly_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <math.h> - -#define SNR 30.0 -#define CENTER -4.0 -#define CUTOFF -5.595 -#define ERR_DELTA (1e-4) -#define NUM_ITERS 100000 -#define VEC_LEN 64 -static float uniform() { - return ((float) rand() / RAND_MAX); // uniformly (0, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * -SNR/2.0; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32f_sum_of_poly_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32f_sum_of_poly_aligned16::t1(){ - int i = 0; - - volk_environment_init(); - int ret; - - const int vlen = VEC_LEN; - float cutoff = CUTOFF; - - float* center_point_array; - float* target; - float* target_generic; - float* src0 ; - - - ret = posix_memalign((void**)¢er_point_array, 16, 24); - ret = posix_memalign((void**)&target, 16, 4); - ret = posix_memalign((void**)&target_generic, 16, 4); - ret = posix_memalign((void**)&src0, 16, (vlen << 2)); - - - random_floats((float*)src0, vlen); - - float a = (float)CENTER; - float etoa = expf(a); - center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 + - (-4.0 * a * a * a)/24.0 + - (3.0 * a * a)/6.0 + - (-2.0 * a)/2.0 + - (1.0)) * etoa; - center_point_array[1] = (//(-10.0 * a * a * a)/120.0 + - (6.0 * a * a)/24.0 + - (-3.0 * a)/6.0 + - (1.0/2.0)) * etoa; - center_point_array[2] = (//(10.0 * a * a)/120.0 + - (-4.0 * a)/24.0 + - (1.0/6.0)) * etoa; - center_point_array[3] = (//(-5.0 * a)/120.0 + - (1.0/24.0)) * etoa; - //center_point_array[4] = ((1.0)/120.0) * etoa; - center_point_array[4] = (//(a * a * a * a * a)/120.0 + - (a * a * a * a)/24.0 + - (a * a * a)/-6.0 + - (a * a)/2.0 + - -a + 1.0) * etoa; - - printf("32f_sum_of_poly_aligned16\n"); - - clock_t start, end; - double total; - - float my_sum = 0.0; - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - float sum = 0.0; - for(int l = 0; l < vlen; ++l) { - - sum += expf(src0[l]); - - } - my_sum = sum; - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("exp time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - - volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic"); - - } - - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 approx time: %f\n", total); - - - - printf("exp: %f, sse3: %f\n", my_sum, target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA); - - - free(center_point_array); - free(target); - free(target_generic); - free(src0); - - -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h deleted file mode 100644 index 67a347f9a..000000000 --- a/volk/lib/qa_32f_sum_of_poly_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H -#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc deleted file mode 100644 index 4eba0a3cd..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ /dev/null @@ -1,85 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_32f_multiply_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_32f_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - float * taps; - int i; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats(taps, vlen); - - printf("32fc_32f_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_32f_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ - diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h deleted file mode 100644 index fc3b3eeb2..000000000 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc deleted file mode 100644 index 64ea65da9..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc +++ /dev/null @@ -1,83 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_32f_power_32fc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1.5e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE -void qa_32fc_32f_power_32fc_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 10000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - int i; - - std::complex<float>* result_generic; - std::complex<float>* result_sse; - - ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float)); - - random_floats((float*)input, vlen * 2); - - const float power = 3.2; - - printf("32fc_32f_power_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA); - } - - free(input); - free(result_generic); - free(result_sse); - -} -#else -void qa_32fc_32f_power_32fc_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE */ - diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h deleted file mode 100644 index 464b7b7cc..000000000 --- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H -#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc deleted file mode 100644 index c55ab5aa0..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_atan2_32f_aligned16.h> -#include <volk/volk_32fc_atan2_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_atan2_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_atan2_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_atan2_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h deleted file mode 100644 index 9c4dc209a..000000000 --- a/volk/lib/qa_32fc_atan2_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc deleted file mode 100644 index 2f9a30395..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_conjugate_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse - -#if LV_HAVE_SSE && LV_HAVE_64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#elif LV_HAVE_SSE && LV_HAVE_32 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform () * 32767; -} - - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - const int vlen = 789743; - - volk_environment_init(); - int ret; - - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - - - volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32"); - - printf("32fc_conjugate_dot_prod_aligned16\n"); - printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0])); - - assertcomplexEqual(result_generic[0], result[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result); - -} - - -#else - -void qa_32fc_conjugate_dot_prod_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#endif /*LV_HAVE_SSE*/ diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h deleted file mode 100644 index 507b1769b..000000000 --- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 72e084c05..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_32f_aligned16.h> -#include <volk/volk_32fc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h deleted file mode 100644 index 78660e6ad..000000000 --- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc deleted file mode 100644 index 89770c236..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_64f_aligned16.h> -#include <volk/volk_32fc_deinterleave_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_generic1[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - double output_sse21[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h deleted file mode 100644 index f924b9752..000000000 --- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 7472476f7..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 68b80f27d..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 5cbdc49b3..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 765450bb6..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc deleted file mode 100644 index 4147e30ae..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_deinterleave_real_64f_aligned16.h> -#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32fc_deinterleave_real_64f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - double output_generic[vlen] __attribute__ ((aligned (16))); - double output_sse2[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_deinterleave_real_64f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h deleted file mode 100644 index 3e55fb812..000000000 --- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H -#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc deleted file mode 100644 index bcf9ea954..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.cc +++ /dev/null @@ -1,214 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_dot_prod_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> -#include <stdio.h> - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - - - -#if LV_HAVE_SSE3 -void qa_32fc_dot_prod_aligned16::t1() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_32 -void qa_32fc_dot_prod_aligned16::t2() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_32_time: %f\n", total); - - printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t2() { - printf("sse_32 not available... no test performed\n"); -} - -#endif - -#if LV_HAVE_SSE && LV_HAVE_64 - -void qa_32fc_dot_prod_aligned16::t3() { - - const int vlen = 2046; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen << 3); - ret = posix_memalign((void**)&taps, 16, vlen << 3); - ret = posix_memalign((void**)&result_generic, 16, 8); - ret = posix_memalign((void**)&result_sse3, 16, 8); - - - result_generic[0] = std::complex<float>(0,0); - result_sse3[0] = std::complex<float>(0,0); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_dot_prod_aligned16\n"); - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8, "generic"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64"); - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_64_time: %f\n", total); - - printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0])); - - - assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA); - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} - -#else -void qa_32fc_dot_prod_aligned16::t3() { - printf("sse_64 not available... no test performed\n"); -} - - - -#endif diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h deleted file mode 100644 index 4b360db27..000000000 --- a/volk/lib/qa_32fc_dot_prod_aligned16.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H -#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); - void t2 (); - void t3 (); -}; - - -#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc deleted file mode 100644 index 16984e30d..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_magnitude_16s_aligned16.h> -#include <volk/volk_32fc_magnitude_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_16s_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_16s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse[vlen] __attribute__ ((aligned (16))); - int16_t output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_magnitude_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h deleted file mode 100644 index ffdf1dd9e..000000000 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc deleted file mode 100644 index b99f1ddcf..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_magnitude_32f_aligned16.h> -#include <volk/volk_32fc_magnitude_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_magnitude_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_magnitude_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - printf("32fc_magnitude_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); - } -} - -#endif diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h deleted file mode 100644 index a2881308c..000000000 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc deleted file mode 100644 index e1f7eab3d..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ /dev/null @@ -1,86 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_32fc_multiply_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - - - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-3) - -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 -void qa_32fc_multiply_aligned16::t1() { - - const int vlen = 2046; - const int ITERS = 100000; - - int i; - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<float>* input; - std::complex<float>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse3; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); - - random_floats((float*)input, vlen * 2); - random_floats((float*)taps, vlen * 2); - - printf("32fc_multiply_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#endif /* LV_HAVE_SSE3 */ diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h deleted file mode 100644 index c8abaa8fe..000000000 --- a/volk/lib/qa_32fc_multiply_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H -#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_multiply_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc deleted file mode 100644 index 1444c78a9..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_power_spectrum_32f_aligned16.h> -#include <volk/volk_32fc_power_spectrum_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse3 - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - printf("sse3 not available... no test performed\n"); -} - -#else - -void qa_32fc_power_spectrum_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); - - const float scalar = vlen; - - float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { - inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); - } - - printf("32fc_power_spectrum_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); - } -} - -#endif diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h deleted file mode 100644 index d991223f3..000000000 --- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H -#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc deleted file mode 100644 index d9ead8495..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_square_dist_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> - -#define ERR_DELTA (1e-4) -#define NUM_ITERS 10000000 -#define VEC_LEN 64 -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex<float>* src0 ; - std::complex<float>* points; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - - printf("32fc_square_dist_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(; i < vlen; ++i) { - //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h deleted file mode 100644 index 9d365d8b0..000000000 --- a/volk/lib/qa_32fc_square_dist_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc deleted file mode 100644 index f923d1d5c..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include <volk/volk.h> -#include <qa_32fc_square_dist_scalar_mult_aligned16.h> -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include <time.h> - -#define ERR_DELTA .0001 -#define NUM_ITERS 10000000 -#define VEC_LEN 64 - -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - unsigned int i = 0; - for (; i < n; i++) { - - buf[i] = uniform () * 32767; - - } -} - - -#ifndef LV_HAVE_SSE3 - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - printf("sse3 not available... no test performed\n"); -} - -#else - - -void qa_32fc_square_dist_scalar_mult_aligned16::t1(){ - int i = 0; - - const int vlen = VEC_LEN; - - volk_environment_init(); - int ret; - - float* target; - float* target_generic; - std::complex<float>* src0 ; - std::complex<float>* points; - float scalar; - - ret = posix_memalign((void**)&points, 16, vlen << 3); - ret = posix_memalign((void**)&target, 16, vlen << 2); - ret = posix_memalign((void**)&target_generic, 16, vlen << 2); - ret = posix_memalign((void**)&src0, 16, 8); - - random_floats((float*)points, vlen * 2); - random_floats((float*)src0, 2); - random_floats(&scalar, 1); - - printf("32fc_square_dist_scalar_mult_aligned16\n"); - - clock_t start, end; - double total; - - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic time: %f\n", total); - - start = clock(); - for(int k = 0; k < NUM_ITERS; ++k) { - volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3"); - } - - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3 time: %f\n", total); - - - - for(i = 0; i < vlen; ++i) { - printf("generic: %f, sse3: %f\n", target_generic[i], target[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA); - } - - free(target); - free(target_generic); - free(points); - free(src0); -} - -#endif /*LV_HAVE_SSE3*/ diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h deleted file mode 100644 index ac4e3c45b..000000000 --- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H -#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc deleted file mode 100644 index 661801709..000000000 --- a/volk/lib/qa_32s_and_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_and_aligned16.h> -#include <volk/volk_32s_and_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_and_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_and_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_and_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h deleted file mode 100644 index dfcb47c63..000000000 --- a/volk/lib/qa_32s_and_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H -#define INCLUDED_QA_32S_AND_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_and_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_and_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc deleted file mode 100644 index 07d799809..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_convert_32f_aligned16.h> -#include <volk/volk_32s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h deleted file mode 100644 index efd2a2eea..000000000 --- a/volk/lib/qa_32s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc deleted file mode 100644 index 2ec610ffb..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_convert_32f_unaligned16.h> -#include <volk/volk_32s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_32s_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32s_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - int32_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0)); - } - printf("32s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h deleted file mode 100644 index 5006f5fd8..000000000 --- a/volk/lib/qa_32s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc deleted file mode 100644 index 9da2ae344..000000000 --- a/volk/lib/qa_32s_or_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_32s_or_aligned16.h> -#include <volk/volk_32s_or_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE - -void qa_32s_or_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_32s_or_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int32_t input0[vlen] __attribute__ ((aligned (16))); - int32_t input1[vlen] __attribute__ ((aligned (16))); - - int32_t output0[vlen] __attribute__ ((aligned (16))); - int32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); - input1[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } - printf("32s_or_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h deleted file mode 100644 index 9e949eb52..000000000 --- a/volk/lib/qa_32s_or_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H -#define INCLUDED_QA_32S_OR_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32s_or_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32s_or_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc deleted file mode 100644 index 313c786b6..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_32u_byteswap_aligned16.h> -#include <volk/volk_32u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_32u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_32u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint32_t output0[vlen] __attribute__ ((aligned (16))); - uint32_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint32_t)); - printf("32u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h deleted file mode 100644 index 47bad4c3d..000000000 --- a/volk/lib/qa_32u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_32u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc deleted file mode 100644 index 7f9c4584a..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_convert_32f_aligned16.h> -#include <volk/volk_64f_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_aligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h deleted file mode 100644 index 95d79f73d..000000000 --- a/volk/lib/qa_64f_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc deleted file mode 100644 index 98aadbf4d..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_convert_32f_unaligned16.h> -#include <volk/volk_64f_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse2 - -#ifndef LV_HAVE_SSE2 - -void qa_64f_convert_32f_unaligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_convert_32f_unaligned16::t1() { - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - - double input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse2[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h deleted file mode 100644 index 430327e81..000000000 --- a/volk/lib/qa_64f_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc deleted file mode 100644 index 76e755514..000000000 --- a/volk/lib/qa_64f_max_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_max_aligned16.h> -#include <volk/volk_64f_max_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_max_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_max_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_max_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h deleted file mode 100644 index 7cbd4d4c1..000000000 --- a/volk/lib/qa_64f_max_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H -#define INCLUDED_QA_64F_MAX_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_max_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_max_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */ diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc deleted file mode 100644 index 4b70d2881..000000000 --- a/volk/lib/qa_64f_min_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_64f_min_aligned16.h> -#include <volk/volk_64f_min_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64f_min_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64f_min_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - double input0[vlen] __attribute__ ((aligned (16))); - double input1[vlen] __attribute__ ((aligned (16))); - - double output0[vlen] __attribute__ ((aligned (16))); - double output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2)); - } - printf("64f_min_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h deleted file mode 100644 index a0e95395f..000000000 --- a/volk/lib/qa_64f_min_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H -#define INCLUDED_QA_64F_MIN_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64f_min_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64f_min_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */ diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc deleted file mode 100644 index 20d012c9e..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include <volk/volk.h> -#include <qa_64u_byteswap_aligned16.h> -#include <volk/volk_64u_byteswap_aligned16.h> -#include <cstdlib> -#include <cstring> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE2 - -void qa_64u_byteswap_aligned16::t1() { - printf("sse2 not available... no test performed\n"); -} - -#else - -void qa_64u_byteswap_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100001; - - uint64_t output0[vlen] __attribute__ ((aligned (16))); - uint64_t output01[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } - memcpy(output01, output0, vlen*sizeof(uint64_t)); - printf("64u_byteswap_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse2_time: %f\n", total); - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); - } -} - -#endif diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h deleted file mode 100644 index a4fa0c983..000000000 --- a/volk/lib/qa_64u_byteswap_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H -#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_64u_byteswap_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc deleted file mode 100644 index 8dd5f76ca..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_16s_aligned16.h> -#include <volk/volk_8s_convert_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h deleted file mode 100644 index 38739fc96..000000000 --- a/volk/lib/qa_8s_convert_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc deleted file mode 100644 index 12c502d4b..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_16s_unaligned16.h> -#include <volk/volk_8s_convert_16s_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4_1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_16s_unaligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_16s_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_16s_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h deleted file mode 100644 index d39fffc35..000000000 --- a/volk/lib/qa_8s_convert_16s_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc deleted file mode 100644 index 672f5662f..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_32f_aligned16.h> -#include <volk/volk_8s_convert_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_aligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h deleted file mode 100644 index 7f8401d42..000000000 --- a/volk/lib/qa_8s_convert_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc deleted file mode 100644 index 43468b1b1..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8s_convert_32f_unaligned16.h> -#include <volk/volk_8s_convert_32f_unaligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse4.1 - -#ifndef LV_HAVE_SSE4_1 - -void qa_8s_convert_32f_unaligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8s_convert_32f_unaligned16::t1() { - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - int8_t input0[vlen+1] __attribute__ ((aligned (16))); - - float output_generic[vlen+1] __attribute__ ((aligned (16))); - float output_sse4_1[vlen+1] __attribute__ ((aligned (16))); - - for(int i = 0; i < vlen; ++i) { - input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } - printf("8s_convert_32f_unaligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%e...%e\n", output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h deleted file mode 100644 index aad2f8c22..000000000 --- a/volk/lib/qa_8s_convert_32f_unaligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H -#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc deleted file mode 100644 index 94e63e37d..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_16s_aligned16.h> -#include <volk/volk_8sc_deinterleave_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_generic1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_11[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse4_11[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h deleted file mode 100644 index 9c99fed70..000000000 --- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc deleted file mode 100644 index 29073eed7..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_32f_aligned16.h> -#include <volk/volk_8sc_deinterleave_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_generic1[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - float output_sse1[vlen] __attribute__ ((aligned (16))); - float output_sse4_1[vlen] __attribute__ ((aligned (16))); - float output_sse14_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < vlen; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4)); - } -} - - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h deleted file mode 100644 index 63b5fdadb..000000000 --- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc deleted file mode 100644 index 4980c982a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_16s_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - printf("sse4_1 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_16s_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int16_t output_generic[vlen] __attribute__ ((aligned (16))); - int16_t output_sse4_1[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_16s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4.1_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h deleted file mode 100644 index 02050926f..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc deleted file mode 100644 index 3c3f737a1..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_32f_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSE4_1 - -#ifndef LV_HAVE_SSE - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - printf("sse not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - } -} - -#endif /* LV_HAVE_SSE */ - -#else - -void qa_8sc_deinterleave_real_32f_aligned16::t1() { - - - volk_runtime_init(); - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> *input0; - - float* output_generic; - float* output_sse; - float* output_sse4_1; - - ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t)); - ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float)); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0); - } - - printf("8sc_deinterleave_real_32f_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); - CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); - } - - free(input0); - free(output_generic); - free(output_sse); - free(output_sse4_1); -} - -#endif /* LV_HAVE_SSE4_1 */ diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h deleted file mode 100644 index 93338e488..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc deleted file mode 100644 index a33d1bf30..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include <volk/volk.h> -#include <qa_8sc_deinterleave_real_8s_aligned16.h> -#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h> -#include <cstdlib> -#include <ctime> - -//test for sse - -#ifndef LV_HAVE_SSSE3 - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - printf("ssse3 not available... no test performed\n"); -} - -#else - -void qa_8sc_deinterleave_real_8s_aligned16::t1() { - - volk_environment_init(); - clock_t start, end; - double total; - const int vlen = 3201; - const int ITERS = 100000; - std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16))); - - int8_t output_generic[vlen] __attribute__ ((aligned (16))); - int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); - - int8_t* loadInput = (int8_t*)input0; - for(int i = 0; i < vlen*2; ++i) { - loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - printf("8sc_deinterleave_real_8s_aligned\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("ssse3_time: %f\n", total); - - for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); - } - - for(int i = 0; i < vlen; ++i) { - //printf("%d...%d\n", output0[i], output01[i]); - CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); - } -} - -#endif diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h deleted file mode 100644 index 92fc0dd4a..000000000 --- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H -#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc deleted file mode 100644 index 216bf1cef..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_multiply_conjugate_16sc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <ctime> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_16sc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<int8_t>* input; - std::complex<int8_t>* taps; - - std::complex<int16_t>* result_generic; - std::complex<int16_t>* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t)); - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_16sc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h deleted file mode 100644 index 0e78a5eca..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc deleted file mode 100644 index 4c707446e..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include <volk/volk_runtime.h> -#include <volk/volk.h> -#include <qa_8sc_multiply_conjugate_32fc_aligned16.h> -#include <stdlib.h> -#include <math.h> -#include <ctime> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); - -#define ERR_DELTA (1e-4) - -#ifndef LV_HAVE_SSE4_1 - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - printf("sse4.1 not available... no test performed\n"); -} - -#else - -void qa_8sc_multiply_conjugate_32fc_aligned16::t1() { - - - volk_runtime_init(); - - const int vlen = 2046; - const int ITERS = 100000; - - volk_environment_init(); - int ret; - clock_t start, end; - double total; - std::complex<int8_t>* input; - std::complex<int8_t>* taps; - - std::complex<float>* result_generic; - std::complex<float>* result_sse4_1; - int i; - int8_t* inputInt8_T; - int8_t* tapsInt8_T; - - ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t)); - ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); - ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float)); - - - inputInt8_T = (int8_t*)input; - tapsInt8_T = (int8_t*)taps; - for(int i = 0; i < vlen*2; ++i) { - inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0)); - } - - printf("8sc_multiply_conjugate_32fc_aligned16\n"); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse4_1_time: %f\n", total); - - for(i = 0; i < vlen; i++){ - //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i])); - assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA); - } - - free(input); - free(taps); - free(result_generic); - free(result_sse4_1); - -} - -#endif /*LV_HAVE_SSE4_1*/ diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h deleted file mode 100644 index eb9ae309c..000000000 --- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H -#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H - -#include <cppunit/extensions/HelperMacros.h> -#include <cppunit/TestCase.h> - -class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase { - - CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16); - CPPUNIT_TEST (t1); - CPPUNIT_TEST_SUITE_END (); - - private: - void t1 (); -}; - - -#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */ diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc new file mode 100644 index 000000000..e85e2c1bc --- /dev/null +++ b/volk/lib/qa_utils.cc @@ -0,0 +1,447 @@ +#include "qa_utils.h" +#include <stdlib.h> +#include <boost/foreach.hpp> +#include <boost/assign/list_of.hpp> +#include <boost/tokenizer.hpp> +//#include <boost/test/unit_test.hpp> +#include <iostream> +#include <vector> +#include <time.h> +#include <math.h> +#include <boost/lexical_cast.hpp> +//#include <volk/volk_runtime.h> +#include <volk/volk_registry.h> +#include <volk/volk.h> +#include <boost/typeof/typeof.hpp> +#include <boost/type_traits.hpp> + +float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +template <class t> +void random_floats (t *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +void load_random_data(void *data, volk_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + if(type.size == 8) random_floats<double>((double *)data, n); + else random_floats<float>((float *)data, n); + } else { + float int_max = float(uint64_t(2) << (type.size*8)); + if(type.is_signed) int_max /= 2.0; + for(int i=0; i<n; i++) { + float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here + } + } + } +} + +void *make_aligned_buffer(unsigned int len, unsigned int size) { + void *buf; + int ret; + ret = posix_memalign((void**)&buf, 16, len * size); + assert(ret == 0); + memset(buf, 0x00, len*size); + return buf; +} + +void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) { + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); + } +} + +static std::vector<std::string> get_arch_list(const int archs[]) { + std::vector<std::string> archlist; + int num_archs = archs[0]; + + //there has got to be a way to query these arches + for(int i = 0; i < num_archs; i++) { + switch(archs[i+1]) { + case (1<<LV_GENERIC): + archlist.push_back("generic"); + break; + case (1<<LV_ORC): + archlist.push_back("orc"); + break; + case (1<<LV_SSE): + archlist.push_back("sse"); + break; + case (1<<LV_SSE2): + archlist.push_back("sse2"); + break; + case (1<<LV_SSE3): + archlist.push_back("sse3"); + break; + case (1<<LV_SSSE3): + archlist.push_back("ssse3"); + break; + case (1<<LV_SSE4_1): + archlist.push_back("sse4_1"); + break; + case (1<<LV_SSE4_2): + archlist.push_back("sse4_2"); + break; + case (1<<LV_SSE4_A): + archlist.push_back("sse4_a"); + break; + case (1<<LV_MMX): + archlist.push_back("mmx"); + break; + case (1<<LV_AVX): + archlist.push_back("avx"); + break; + default: + break; + } + } + return archlist; +} + +volk_type_t volk_type_from_string(std::string name) { + volk_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + if(name.size() < 2) throw std::string("name too short to be a datatype"); + + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + int last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); + //will throw if malformed + int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(int i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } + } + + return type; +} + +static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, + std::vector<volk_type_t> &outputsig, + std::string name) { + boost::char_separator<char> sep("_"); + boost::tokenizer<boost::char_separator<char> > tok(name, sep); + std::vector<std::string> toked; + tok.assign(name); + toked.assign(tok.begin(), tok.end()); + + assert(toked[0] == "volk"); + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_type_from_string(token); + if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... + + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i<multiplier; i++) { + if(side == SIDE_INPUT) inputsig.push_back(inputsig.back()); + else outputsig.push_back(outputsig.back()); + } + } + else if(side == SIDE_INPUT) { //it's the function name, at least it better be + side = SIDE_NAME; + fn_name.append("_"); + fn_name.append(token); + } + else if(side == SIDE_OUTPUT) { + if(token != toked.back()) throw; //the last token in the name is the alignment + } + } + } + //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input! + assert(inputsig.size() != 0); +} + +inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], vlen, arch.c_str()); +} + +inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); +} + +inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); +} + +inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); +} + +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +template <class t> +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; + for(int i=0; i<vlen; i++) { + if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision + if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } + } + + return fail; +} + +template <class t> +bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { + bool fail = false; + int print_max_errs = 10; + for(int i=0; i<vlen; i++) { + if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; + } + } + } + + return fail; +} + +bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { + std::cout << "RUN_VOLK_TESTS: " << name << std::endl; + + //first let's get a list of available architectures for the test + std::vector<std::string> arch_list = get_arch_list(archs); + + if(arch_list.size() < 2) { + std::cout << "no architectures to test" << std::endl; + return false; + } + + //now we have to get a function signature by parsing the name + std::vector<volk_type_t> inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); + + //pull the input scalars into their own vector + std::vector<volk_type_t> inputsc; + for(int i=0; i<inputsig.size(); i++) { + if(inputsig[i].is_scalar) { + inputsc.push_back(inputsig[i]); + inputsig.erase(inputsig.begin() + i); + } + } + + //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl; + //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl; + std::vector<void *> inbuffs; + std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later. + //we need it because we dupe void*s in test_data below. + make_buffer_for_signature(inbuffs, inputsig, vlen); + for(int i=0; i<inbuffs.size(); i++) { + load_random_data(inbuffs[i], inputsig[i], vlen); + free_buffs.push_back(inbuffs[i]); + } + + //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch + std::vector<std::vector<void *> > test_data; + for(int i=0; i<arch_list.size(); i++) { + std::vector<void *> arch_buffs; + for(int j=0; j<outputsig.size(); j++) { + arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1))); + free_buffs.push_back(arch_buffs.back()); + } + for(int j=0; j<inputsig.size(); j++) { + arch_buffs.push_back(inbuffs[j]); + } + test_data.push_back(arch_buffs); + } + + std::vector<volk_type_t> both_sigs; + both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); + both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); + + //now run the test + clock_t start, end; + for(int i = 0; i < arch_list.size(); i++) { + start = clock(); + + switch(both_sigs.size()) { + case 1: + if(inputsc.size() == 0) { + run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 1 arg function >1 scalars"; + break; + case 2: + if(inputsc.size() == 0) { + run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 2 arg function >1 scalars"; + break; + case 3: + if(inputsc.size() == 0) { + run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else throw "unsupported 3 arg function >1 scalars"; + break; + case 4: + run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + break; + default: + throw "no function handler for this signature"; + break; + } + + end = clock(); + std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + } + //and now compare each output to the generic output + //first we have to know which output is the generic one, they aren't in order... + int generic_offset=0; + for(int i=0; i<arch_list.size(); i++) + if(arch_list[i] == "generic") generic_offset=i; + + //now compare + //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know + + bool fail = false; + bool fail_global = false; + for(int i=0; i<arch_list.size(); i++) { + if(i != generic_offset) { + for(int j=0; j<both_sigs.size(); j++) { + if(both_sigs[j].is_float) { + if(both_sigs[j].size == 8) { + fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + } else { + //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ + switch(both_sigs[j].size) { + case 8: + if(both_sigs[j].is_signed) { + fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 4: + if(both_sigs[j].is_signed) { + fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 2: + if(both_sigs[j].is_signed) { + fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + case 1: + if(both_sigs[j].is_signed) { + fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } else { + fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); + } + break; + default: + fail=1; + } + } + if(fail) { + fail_global = true; + std::cout << name << ": fail on arch " << arch_list[i] << std::endl; + } + //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); + } + } + } + + BOOST_FOREACH(void *buf, free_buffs) { + free(buf); + } + + return fail_global; +} + + diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h new file mode 100644 index 000000000..e2539060a --- /dev/null +++ b/volk/lib/qa_utils.h @@ -0,0 +1,33 @@ +#ifndef VOLK_QA_UTILS_H +#define VOLK_QA_UTILS_H + +#include <stdlib.h> +#include <string> + +struct volk_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_type_t volk_type_from_string(std::string); + +float uniform(void); +void random_floats(float *buf, unsigned n); + +bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); + +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) + +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place +typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); +typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); +typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*); + +#endif //VOLK_QA_UTILS_H diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h deleted file mode 100644 index 43fa7faba..000000000 --- a/volk/lib/qa_volk.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU Example Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Example Public License for more details. - * - * You should have received a copy of the GNU Example Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#ifndef INCLUDED_QA_VOLK_H -#define INCLUDED_QA_VOLK_H - -#include <cppunit/TestSuite.h> - -//! collect all the tests for the example directory - -class qa_volk { - public: - //! return suite of tests for all of example directory - static CppUnit::TestSuite *suite (); -}; - -#endif /* INCLUDED_QA_VOLK_H */ diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc deleted file mode 100644 index 50ac08eab..000000000 --- a/volk/lib/test_all.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2002,2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Radio; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#include <cppunit/ui/text/TestRunner.h> -#include <cppunit/TextTestRunner.h> - -#include <qa_volk.h> - -#include <cppunit/XmlOutputter.h> -#include <iostream> -#include <getopt.h> -#include <stdlib.h> -#include <stdio.h> -#include <string> -#include <fstream> - -int -main (int argc, char **argv) -{ - - int opt = 0; - std::string xmlOutputFile(""); - - while( (opt = getopt(argc, argv, "o:")) != -1){ - switch(opt){ - case 'o': - if(optarg){ - xmlOutputFile.assign(optarg); - } - else{ - std::cerr << "No xml file output specified for -o" << std::endl; - exit(EXIT_FAILURE); - } - break; - - default: /* '?' */ - fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n", - argv[0]); - exit(EXIT_FAILURE); - } - - } - - CppUnit::TextUi::TestRunner runner; - - runner.addTest (qa_volk::suite ()); - - bool was_successful = false; - if(!xmlOutputFile.empty()){ - std::ofstream xmlOutput(xmlOutputFile.c_str()); - if(xmlOutput.is_open()){ - runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput)); - - was_successful = runner.run("", false, true, false); - } - xmlOutput.close(); - } - else{ - was_successful = runner.run ("", false); - } - - return was_successful ? 0 : 1; -} diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc new file mode 100644 index 000000000..f33670856 --- /dev/null +++ b/volk/lib/testqa.cc @@ -0,0 +1,99 @@ +#include "qa_utils.h" +#include <volk/volk.h> +#include <volk/volk_registry.h> +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_CASE(volk_test_all) { + //in order... +// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); + VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); +// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); +// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + +} diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am new file mode 100644 index 000000000..6b5e4f8b6 --- /dev/null +++ b/volk/orc/Makefile.am @@ -0,0 +1,56 @@ +# +# Copyright 2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS) + +include $(top_srcdir)/Makefile.common +lib_LTLIBRARIES = libvolk_orc.la +libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS) + +libvolk_orc_la_SOURCES = \ +volk_8i_convert_16i_a16_orc_impl.orc \ +volk_8i_s32f_convert_32f_a16_orc_impl.orc \ +volk_16u_byteswap_a16_orc_impl.orc \ +volk_32i_x2_and_32i_a16_orc_impl.orc \ +volk_32i_x2_or_32i_a16_orc_impl.orc \ +volk_32f_x2_add_32f_a16_orc_impl.orc \ +volk_32f_x2_subtract_32f_a16_orc_impl.orc \ +volk_32f_x2_divide_32f_a16_orc_impl.orc \ +volk_32f_x2_multiply_32f_a16_orc_impl.orc \ +volk_32fc_x2_multiply_32fc_a16_orc_impl.orc \ +volk_32fc_32f_multiply_32fc_a16_orc_impl.orc \ +volk_32f_sqrt_32f_a16_orc_impl.orc \ +volk_32f_x2_max_32f_a16_orc_impl.orc \ +volk_32f_x2_min_32f_a16_orc_impl.orc \ +volk_32f_s32f_normalize_a16_orc_impl.orc \ +volk_32fc_magnitude_32f_a16_orc_impl.orc \ +volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc \ +volk_16ic_magnitude_16i_a16_orc_impl.orc \ +volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc \ +volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc \ +volk_16ic_deinterleave_real_8i_a16_orc_impl.orc + + + + +my_ORCC_FLAGS = --implementation $(ORCC_FLAGS) + +.orc.c: + $(ORCC) $(my_ORCC_FLAGS) -o $@ $< diff --git a/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc new file mode 100644 index 000000000..0189fbf5d --- /dev/null +++ b/volk/orc/volk_16i_s32f_deinterleave_32f_x2_a16_orc_impl.orc @@ -0,0 +1,12 @@ +.function volk_16ic_s32f_deinterleave_32f_x2_a16_orc_impl +.dest 4 idst +.dest 4 qdst +.source 4 src +.floatparam 4 scalar +.temp 8 iql +.temp 8 iqf + +x2 convswl iql, src +x2 convlf iqf, iql +x2 divf iqf, iqf, scalar +splitql qdst, idst, iqf diff --git a/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc new file mode 100644 index 000000000..56018edda --- /dev/null +++ b/volk/orc/volk_16ic_deinterleave_16i_x2_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_16ic_deinterleave_16i_x2_a16_orc_impl +.dest 2 idst +.dest 2 qdst +.source 4 src +splitlw qdst, idst, src diff --git a/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc b/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc new file mode 100644 index 000000000..dba9a4c8e --- /dev/null +++ b/volk/orc/volk_16ic_deinterleave_real_8i_a16_orc_impl.orc @@ -0,0 +1,6 @@ +.function volk_16ic_deinterleave_real_8i_a16_orc_impl +.dest 1 dst +.source 4 src +.temp 2 iw +select0lw iw, src +convhwb dst, iw diff --git a/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc new file mode 100644 index 000000000..37225e9b8 --- /dev/null +++ b/volk/orc/volk_16ic_magnitude_16i_a16_orc_impl.orc @@ -0,0 +1,23 @@ +.function volk_16ic_magnitude_16i_a16_orc_impl +.source 4 src +.dest 2 dst +.floatparam 4 scalar +.temp 8 iql +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl + +x2 convswl iql, src +x2 convlf iqf, iql +x2 divf iqf, iqf, scalar +x2 mulf prodiqf, iqf, iqf +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf rootf, sumf +mulf rootf, rootf, scalar +convfl rootl, rootf +convlw dst, rootl diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..1e2380837 --- /dev/null +++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,25 @@ +.function volk_16ic_magnitude_32f_a16_orc_impl +.source 4 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 reall +.temp 4 imagl +.temp 2 reals +.temp 2 imags +.temp 4 realf +.temp 4 imagf +.temp 4 sumf + + + +splitlw reals, imags, src +convswl reall, reals +convswl imagl, imags +convlf realf, reall +convlf imagf, imagl +divf realf, realf, scalar +divf imagf, imagf, scalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf dst, sumf diff --git a/volk/orc/volk_16u_byteswap_a16_orc_impl.orc b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc new file mode 100644 index 000000000..c1c8ee59e --- /dev/null +++ b/volk/orc/volk_16u_byteswap_a16_orc_impl.orc @@ -0,0 +1,3 @@ +.function volk_16u_byteswap_a16_orc_impl +.dest 2 dst +swapw dst, dst diff --git a/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc b/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc new file mode 100644 index 000000000..acd319b16 --- /dev/null +++ b/volk/orc/volk_32f_s32f_normalize_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_s32f_normalize_a16_orc_impl +.source 4 src1 +.floatparam 4 invscalar +.dest 4 dst +mulf dst, src1, invscalar diff --git a/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc b/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc new file mode 100644 index 000000000..ae5680f15 --- /dev/null +++ b/volk/orc/volk_32f_sqrt_32f_a16_orc_impl.orc @@ -0,0 +1,4 @@ +.function volk_32f_sqrt_32f_a16_orc_impl +.source 4 src +.dest 4 dst +sqrtf dst, src diff --git a/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc new file mode 100644 index 000000000..8d095a052 --- /dev/null +++ b/volk/orc/volk_32f_x2_add_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_add_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +addf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc new file mode 100644 index 000000000..0097646cb --- /dev/null +++ b/volk/orc/volk_32f_x2_divide_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_divide_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +divf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc new file mode 100644 index 000000000..b7f008737 --- /dev/null +++ b/volk/orc/volk_32f_x2_max_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_max_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +maxf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc new file mode 100644 index 000000000..78328b576 --- /dev/null +++ b/volk/orc/volk_32f_x2_min_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_min_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +minf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc new file mode 100644 index 000000000..e8fadff19 --- /dev/null +++ b/volk/orc/volk_32f_x2_multiply_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_multiply_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +mulf dst, src1, src2 diff --git a/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc new file mode 100644 index 000000000..13fbe8c83 --- /dev/null +++ b/volk/orc/volk_32f_x2_subtract_32f_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_x2_subtract_32f_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +subf dst, src1, src2 diff --git a/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc new file mode 100644 index 000000000..455293cff --- /dev/null +++ b/volk/orc/volk_32fc_32f_multiply_32fc_a16_orc_impl.orc @@ -0,0 +1,7 @@ +.function volk_32fc_32f_multiply_32fc_a16_orc_impl +.source 8 src1 +.source 4 src2 +.dest 8 dst +.temp 8 tmp +mergelq tmp, src2, src2 +x2 mulf dst, src1, tmp diff --git a/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc new file mode 100644 index 000000000..c5e2e57f1 --- /dev/null +++ b/volk/orc/volk_32fc_magnitude_32f_a16_orc_impl.orc @@ -0,0 +1,13 @@ +.function volk_32fc_magnitude_32f_a16_orc_impl +.source 8 src +.dest 4 dst +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf + +x2 mulf prodiqf, src, src +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf dst, sumf diff --git a/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc b/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc new file mode 100644 index 000000000..6116f5e1f --- /dev/null +++ b/volk/orc/volk_32fc_s32f_magnitude_16i_a16_orc_impl.orc @@ -0,0 +1,23 @@ +.function volk_32fc_s32f_magnitude_16i_a16_orc_impl +.source 8 src +.dest 2 dst +.floatparam 4 scalar +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl +.temp 4 maskl + +x2 mulf prodiqf, src, src +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf rootf, sumf +mulf rootf, rootf, scalar +cmpltf maskl, 32768.0, rootf +andl maskl, maskl, 0x80000000 +orl rootf, rootf, maskl +convfl rootl, rootf +convssslw dst, rootl diff --git a/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc b/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc new file mode 100644 index 000000000..a27d722cd --- /dev/null +++ b/volk/orc/volk_32fc_x2_multiply_32fc_a16_orc_impl.orc @@ -0,0 +1,18 @@ +.function volk_32fc_x2_multiply_32fc_a16_orc_impl +.source 8 src1 +.source 8 src2 +.dest 8 dst +.temp 8 iqprod +.temp 4 real +.temp 4 imag +.temp 4 ac +.temp 4 bd +.temp 8 swapped +x2 mulf iqprod, src1, src2 +splitql bd, ac, iqprod +subf real, ac, bd +swaplq swapped, src1 +x2 mulf iqprod, swapped, src2 +splitql bd, ac, iqprod +addf imag, ac, bd +mergelq dst, real, imag diff --git a/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc new file mode 100644 index 000000000..7b331f8ed --- /dev/null +++ b/volk/orc/volk_32i_x2_and_32i_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32i_x2_and_32i_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +andl dst, src1, src2 diff --git a/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc b/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc new file mode 100644 index 000000000..4984a9ced --- /dev/null +++ b/volk/orc/volk_32i_x2_or_32i_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32i_x2_or_32i_a16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +orl dst, src1, src2 diff --git a/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc b/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc new file mode 100644 index 000000000..f44845c88 --- /dev/null +++ b/volk/orc/volk_8i_convert_16i_a16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_8i_convert_16i_a16_orc_impl +.source 1 src +.dest 2 dst +convsbw dst, src +shlw dst, dst, 8 diff --git a/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc b/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc new file mode 100644 index 000000000..8f6e157e9 --- /dev/null +++ b/volk/orc/volk_8i_s32f_convert_32f_a16_orc_impl.orc @@ -0,0 +1,11 @@ +.function volk_8i_s32f_convert_32f_a16_orc_impl +.source 1 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 flsrc +.temp 4 lsrc +.temp 2 ssrc +convsbw ssrc, src +convswl lsrc, ssrc +convlf flsrc, lsrc +mulf dst, flsrc, scalar diff --git a/volk/volk.pc.in b/volk/volk.pc.in index a24298856..b03dbdada 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -10,6 +10,6 @@ Name: volk Description: VOLK.. Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime +Libs: -lvolk -lvolk_runtime -lvolk_orc Cflags: -I${includedir} ${LV_CXXFLAGS} |