summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/.gitignore21
-rw-r--r--volk/lib/CMakeLists.txt260
-rw-r--r--volk/lib/Makefile.am158
-rw-r--r--volk/lib/qa_16s_add_quad_aligned16.cc26
-rw-r--r--volk/lib/qa_16s_branch_4_state_8_aligned16.cc20
-rw-r--r--volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc18
-rw-r--r--volk/lib/qa_16s_quad_max_star_aligned16.cc12
-rw-r--r--volk/lib/qa_32f_fm_detect_aligned16.cc6
-rw-r--r--volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc6
-rw-r--r--volk/lib/qa_32u_popcnt_aligned16.cc6
-rw-r--r--volk/lib/qa_64u_popcnt_aligned16.cc6
-rw-r--r--volk/lib/qa_utils.cc94
-rw-r--r--volk/lib/qa_utils.h9
-rw-r--r--volk/lib/testqa.cc183
-rw-r--r--volk/lib/volk_prefs.c49
-rw-r--r--volk/lib/volk_rank_archs.c40
-rw-r--r--volk/lib/volk_rank_archs.h4
17 files changed, 535 insertions, 383 deletions
diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore
index 6a5fde28f..28ec6ddaa 100644
--- a/volk/lib/.gitignore
+++ b/volk/lib/.gitignore
@@ -1,23 +1,4 @@
-/*.cache
-/*.la
-/*.lo
-/*.pc
-/.deps
-/.la
-/.libs
-/.lo
/Makefile
/Makefile.in
-/volk.c
-/volk_cpu_generic.c
-/volk_cpu_powerpc.c
-/volk_cpu_x86.c
-/volk_environment_init.c
-/volk_init.c
-/volk_init.h
-/volk_mktables
-/volk_mktables.c
-/volk_proccpu_sim.c
-/volk_runtime.c
-/test_all
+/Makefile.am
/testqa
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
new file mode 100644
index 000000000..5dd41be0f
--- /dev/null
+++ b/volk/lib/CMakeLists.txt
@@ -0,0 +1,260 @@
+#
+# Copyright 2011 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+########################################################################
+# Parse the arches xml file:
+# Test each arch to see if the compiler supports the flag.
+# If the test passes append the arch to the available list.
+########################################################################
+#extract the arch lines from the xml file using crazy python
+EXECUTE_PROCESS(
+ COMMAND ${PYTHON_EXECUTABLE} -c
+ "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))"
+ OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+#This macro sets the ${arch}_flag variable,
+#and handles special cases for MSVC arch flags.
+MACRO(set_arch_flag name flag)
+ IF(MSVC AND ${name} STREQUAL "mmx")
+ SET(${name}_flag "/arch:SSE") #no /arch:MMX
+ ELSEIF(MSVC AND ${name} STREQUAL "sse")
+ SET(${name}_flag "/arch:SSE")
+ ELSEIF(MSVC AND ${name} STREQUAL "sse2")
+ SET(${name}_flag "/arch:SSE2")
+ ELSE()
+ SET(${name}_flag -${flag})
+ ENDIF()
+ENDMACRO(set_arch_flag)
+
+MACRO(handle_arch name flag)
+
+ #handle special case for none flag
+ IF(${flag} STREQUAL "none")
+ SET(have_${name} TRUE)
+
+ #otherwise test the flag against the compiler
+ ELSE()
+ INCLUDE(CheckCXXCompilerFlag)
+ set_arch_flag(${name} ${flag})
+ CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name})
+ ENDIF()
+
+ IF(have_${name})
+ LIST(APPEND available_arches ${name})
+ ENDIF()
+ENDMACRO(handle_arch)
+
+#create a list of available arches
+FOREACH(arch_line ${arch_lines})
+ SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}")
+ handle_arch(${args})
+ENDFOREACH(arch_line)
+
+MESSAGE(STATUS "Available arches: ${available_arches}")
+
+########################################################################
+# Parse the machines xml file:
+# Test each machine to see if its arch dependencies are supported.
+# Build a list of supported machines and the machine definitions.
+########################################################################
+#extract the machine lines from the xml file using crazy python
+EXECUTE_PROCESS(
+ COMMAND ${PYTHON_EXECUTABLE} -c
+ "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))"
+ OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+MACRO(handle_machine1 name)
+ UNSET(machine_flags)
+ STRING(TOUPPER LV_MACHINE_${name} machine_def)
+
+ #check if all the arches are supported
+ FOREACH(arch ${ARGN})
+ SET(is_match ${have_${arch}})
+ IF(NOT is_match)
+ SET(is_match FALSE)
+ BREAK()
+ ENDIF(NOT is_match)
+ SET(machine_flags "${machine_flags} ${${arch}_flag}")
+ ENDFOREACH(arch)
+
+ IF(is_match)
+ #this is a match, append the source and set its flags
+ SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c)
+ SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags})
+ LIST(APPEND machine_sources ${machine_source})
+ LIST(APPEND machine_defs ${machine_def})
+ LIST(APPEND available_machines ${name})
+ ENDIF()
+ENDMACRO(handle_machine1)
+
+MACRO(handle_machine name)
+ SET(arches ${ARGN})
+ LIST(FIND arches "32|64" index)
+ IF(${index} EQUAL -1)
+ handle_machine1(${name} ${arches})
+ ELSE()
+ LIST(REMOVE_ITEM arches "32|64")
+ handle_machine1(${name}_32 32 ${arches})
+ handle_machine1(${name}_64 64 ${arches})
+ ENDIF()
+ENDMACRO(handle_machine)
+
+#setup the available machines
+FOREACH(machine_line ${machine_lines})
+ SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}")
+ handle_machine(${args})
+ENDFOREACH(machine_line)
+
+MESSAGE(STATUS "Available machines: ${available_machines}")
+
+########################################################################
+# Create rules to run the volk generator
+########################################################################
+#list of the generated sources
+SET(volk_gen_sources
+ ${CMAKE_BINARY_DIR}/include/volk/volk.h
+ ${CMAKE_BINARY_DIR}/lib/volk.c
+ ${CMAKE_BINARY_DIR}/lib/volk_init.h
+ ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h
+ ${CMAKE_BINARY_DIR}/lib/volk_cpu.c
+ ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h
+ ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c
+ ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h
+ ${CMAKE_BINARY_DIR}/lib/volk_machines.h
+ ${CMAKE_BINARY_DIR}/lib/volk_machines.c
+ ${machine_sources}
+)
+
+#dependencies are all python, xml, and header implementation files
+FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
+FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
+FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h)
+
+ADD_CUSTOM_COMMAND(
+ OUTPUT ${volk_gen_sources}
+ DEPENDS ${xml_files} ${py_files} ${h_files}
+ COMMAND ${PYTHON_EXECUTABLE} -B
+ ${CMAKE_SOURCE_DIR}/gen/volk_register.py
+ ${CMAKE_BINARY_DIR}
+)
+
+########################################################################
+# Handle orc support
+########################################################################
+FIND_PACKAGE(PkgConfig)
+IF(PKG_CONFIG_FOUND)
+PKG_CHECK_MODULES(ORC "orc-0.4")
+ENDIF(PKG_CONFIG_FOUND)
+
+FIND_PROGRAM(ORCC_EXECUTABLE orcc)
+
+IF(ORC_FOUND AND ORCC_EXECUTABLE)
+ #setup orc library usage
+ INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS})
+ LINK_DIRECTORIES(${ORC_LIBRARY_DIRS})
+ ADD_DEFINITIONS(-DLV_HAVE_ORC)
+
+ #setup orc functions
+ FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
+ FOREACH(orc_file ${orc_files})
+
+ #extract the name for the generated c source from the orc file
+ GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE)
+ SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c)
+
+ #create a rule to generate the source and add to the list of sources
+ ADD_CUSTOM_COMMAND(
+ COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file}
+ DEPENDS ${orc_file} OUTPUT ${orcc_gen}
+ )
+ LIST(APPEND volk_sources ${orcc_gen})
+
+ ENDFOREACH(orc_file)
+ELSE()
+ MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...")
+ENDIF()
+
+########################################################################
+# Setup the volk sources list and library
+########################################################################
+IF(NOT WIN32)
+ ADD_DEFINITIONS(-fvisibility=hidden)
+ENDIF()
+
+INCLUDE_DIRECTORIES(
+ ${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_BINARY_DIR}/include
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+LIST(APPEND volk_sources
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c
+ ${volk_gen_sources}
+)
+
+#set the machine definitions where applicable
+SET_SOURCE_FILES_PROPERTIES(
+ ${CMAKE_CURRENT_BINARY_DIR}/volk.c
+ ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c
+PROPERTIES COMPILE_DEFINITIONS "${machine_defs}")
+
+IF(MSVC)
+ #add compatibility includes for stdint types
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc)
+ #compile the sources as C++ due to the lack of complex.h under MSVC
+ SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX)
+ENDIF(MSVC)
+
+#create the volk runtime library
+ADD_LIBRARY(volk SHARED ${volk_sources})
+TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES})
+SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER})
+SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS")
+
+INSTALL(TARGETS volk
+ LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file
+ ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file
+ RUNTIME DESTINATION bin # .dll file
+)
+
+########################################################################
+# Build the QA test application
+########################################################################
+FIND_PACKAGE(Boost COMPONENTS unit_test_framework)
+
+IF(Boost_FOUND)
+
+SET_SOURCE_FILES_PROPERTIES(
+ ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES
+ COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN"
+)
+
+INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS})
+LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
+
+ADD_EXECUTABLE(test_all
+ ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
+)
+TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES})
+ADD_TEST(qa_volk_test_all test_all)
+
+ENDIF()
diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
deleted file mode 100644
index 473acd2a6..000000000
--- a/volk/lib/Makefile.am
+++ /dev/null
@@ -1,158 +0,0 @@
-#
-# Copyright 2010,2011 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-
-include $(top_srcdir)/Makefile.common
-
-#FIXME: forcing the top_builddir for distcheck seems like a bit
-# of a hack. Figure out the right way to do this to find built
-# volk_config.h and volk_tables.h
-
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
- -I$(top_builddir)/include \
- $(LV_CXXFLAGS) $(WITH_INCLUDES)
-
-
-# We build 1 library and 1 executable here. The library contains
-# everything except the QA code. The C++ QA code is especially recommended
-# when you have general purpose C or C++ code that may not get
-# thoroughly exercised by building and running a GR block. The
-# executable runs the QA code at "make check" time.
-#
-#
-#
-# N.B., If there's a SWIG generated shared library and associated
-# python code, it will be contained in ../python, not here. (That
-# code is conditionally built depending on the state of the
-# --without-python configure option.) However, the .i should be here
-# next to the .h that it's based on.
-
-
-# list of programs run by "make check" and "make distcheck"
-#TESTS = testqa
-#orc stuff gets built in the ORC directory conditional to ORC being enabled.
-#it gets linked in during the build of libvolk as an added library.
-#there might be a better way to do this.
-
-lib_LTLIBRARIES = \
- libvolk.la \
- libvolk_runtime.la
-
-EXTRA_DIST = \
- volk_mktables.c \
- volk_rank_archs.h \
- volk_proccpu_sim.c \
- gcc_x86_cpuid.h
-
-# ----------------------------------------------------------------
-# The main library
-# ----------------------------------------------------------------
-
-libvolk_runtime_la_SOURCES = \
- $(platform_CODE) \
- volk_runtime.c \
- volk_init.c \
- volk_rank_archs.c
-
-libvolk_la_SOURCES = \
- $(platform_CODE) \
- volk.c \
- volk_environment_init.c
-
-volk_orc_LDFLAGS = \
- $(ORC_LDFLAGS) \
- -lorc-0.4
-
-volk_orc_LIBADD = \
- ../orc/libvolk_orc.la
-
-if LV_HAVE_ORC
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
-libvolk_la_LIBADD = $(volk_orc_LIBADD)
-else
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-libvolk_la_LIBADD =
-endif
-
-
-# ----------------------------------------------------------------
-# The QA library. Note libvolk.la in LIBADD
-# ----------------------------------------------------------------
-#libvolk_qa_la_SOURCES = \
-# qa_utils.cc
-
-#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost
-
-#libvolk_qa_la_LIBADD = \
-# libvolk.la \
-# libvolk_runtime.la
-
-# ----------------------------------------------------------------
-# headers that don't get installed
-# ----------------------------------------------------------------
-noinst_HEADERS = \
- volk_init.h \
- qa_utils.h
-
-# ----------------------------------------------------------------
-# Our test program
-# ----------------------------------------------------------------
-noinst_PROGRAMS = \
- testqa
-
-testqa_SOURCES = testqa.cc qa_utils.cc
-testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) \
- $(BOOST_CPPFLAGS)
-testqa_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
-if LV_HAVE_ORC
-testqa_LDADD = \
- libvolk.la \
- libvolk_runtime.la \
- ../orc/libvolk_orc.la
-else
-testqa_LDADD = \
- libvolk.la \
- libvolk_runtime.la
-endif
-
-distclean-local:
- rm -f volk.c
- rm -f volk_cpu_generic.c
- rm -f volk_cpu_powerpc.c
- rm -f volk_cpu_x86.c
- rm -f volk_init.c
- rm -f volk_init.h
- rm -f volk_mktables.c
- rm -f volk_proccpu_sim.c
- rm -f volk_runtime.c
- rm -f volk_tables.h
- rm -f volk_environment_init.c
-#SUBDIRS =
-
-#ifdef BUILD_SSE
-#SUBDIRS += sse
-#elif BUILD_SPU
-#SUBDIRS += spu
-#else
-#SUBDIRS += port
-#endif
-
-
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
index 154aa0f17..5d5eb7e18 100644
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() {
double total;
const int vlen = 3200;
const int ITERS = 100000;
- short input0[vlen] __attribute__ ((aligned (16)));
- short input1[vlen] __attribute__ ((aligned (16)));
- short input2[vlen] __attribute__ ((aligned (16)));
- short input3[vlen] __attribute__ ((aligned (16)));
- short input4[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) short input0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input1[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input3[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input4[vlen];
- short output0[vlen] __attribute__ ((aligned (16)));
- short output1[vlen] __attribute__ ((aligned (16)));
- short output2[vlen] __attribute__ ((aligned (16)));
- short output3[vlen] __attribute__ ((aligned (16)));
- short output01[vlen] __attribute__ ((aligned (16)));
- short output11[vlen] __attribute__ ((aligned (16)));
- short output21[vlen] __attribute__ ((aligned (16)));
- short output31[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) short output0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output1[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output3[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output01[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output11[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output21[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output31[vlen];
for(int i = 0; i < vlen; ++i) {
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
index 62deffaeb..2e6e6a1a0 100644
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
clock_t start, end;
double total;
- short target[vlen] __attribute__ ((aligned (16)));
- short target2[vlen] __attribute__ ((aligned (16)));
- short target3[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) short target[vlen];
+ __VOLK_ATTR_ALIGNED(16) short target2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short target3[vlen];
- short src0[vlen] __attribute__ ((aligned (16)));
- short permute_indexes[vlen] __attribute__ ((aligned (16))) = {
+ __VOLK_ATTR_ALIGNED(16) short src0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
- short cntl0[vlen] __attribute__ ((aligned (16))) = {
+ __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
- short cntl1[vlen] __attribute__ ((aligned (16))) = {
+ __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
- short cntl2[vlen] __attribute__ ((aligned (16))) = {
+ __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
- short cntl3[vlen] __attribute__ ((aligned (16))) = {
+ __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
- short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+ __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
index 819b2256b..3cd4e906d 100644
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
clock_t start, end;
double total;
- short target[vlen] __attribute__ ((aligned (16)));
- short target2[vlen] __attribute__ ((aligned (16)));
- short src0[vlen] __attribute__ ((aligned (16)));
- short permute_indexes[vlen] __attribute__ ((aligned (16)));
- short cntl0[vlen] __attribute__ ((aligned (16)));
- short cntl1[vlen] __attribute__ ((aligned (16)));
- short cntl2[vlen] __attribute__ ((aligned (16)));
- short cntl3[vlen] __attribute__ ((aligned (16)));
- short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+ __VOLK_ATTR_ALIGNED(16) short target[vlen];
+ __VOLK_ATTR_ALIGNED(16) short target2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short src0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
+ __VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
+ __VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
+ __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
index 66f8c9afa..192a69e35 100644
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() {
void qa_16s_quad_max_star_aligned16::t1() {
const int vlen = 34;
- short input0[vlen] __attribute__ ((aligned (16)));
- short input1[vlen] __attribute__ ((aligned (16)));
- short input2[vlen] __attribute__ ((aligned (16)));
- short input3[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) short input0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input1[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input2[vlen];
+ __VOLK_ATTR_ALIGNED(16) short input3[vlen];
- short output0[vlen] __attribute__ ((aligned (16)));
- short output1[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) short output0[vlen];
+ __VOLK_ATTR_ALIGNED(16) short output1[vlen];
for(int i = 0; i < vlen; ++i) {
short plus0 = (short) (rand() - (RAND_MAX/2));
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
index 592304f83..a2e7a85be 100644
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() {
double total;
const int vlen = 3201;
const int ITERS = 10000;
- float input0[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) float input0[vlen];
- float output0[vlen] __attribute__ ((aligned (16)));
- float output01[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) float output0[vlen];
+ __VOLK_ATTR_ALIGNED(16) float output01[vlen];
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
index a3d0955bd..981bb19e6 100644
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
double total;
const int vlen = 3201;
const int ITERS = 10000;
- std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
- float output_generic[vlen] __attribute__ ((aligned (16)));
- float output_sse3[vlen] __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
+ __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
const float scalar = vlen;
const float rbw = 1.7;
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
index 618a82a02..c880260f2 100644
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() {
double total;
const int ITERS = 10000000;
- uint32_t input0 __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) uint32_t input0;
- uint32_t output0 __attribute__ ((aligned (16)));
- uint32_t output01 __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) uint32_t output0;
+ __VOLK_ATTR_ALIGNED(16) uint32_t output01;
input0 = ((uint32_t) (rand() - (RAND_MAX/2)));
output0 = 0;
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
index 85ef58795..6be4e50ea 100644
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() {
double total;
const int ITERS = 10000000;
- uint64_t input0 __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) uint64_t input0;
- uint64_t output0 __attribute__ ((aligned (16)));
- uint64_t output01 __attribute__ ((aligned (16)));
+ __VOLK_ATTR_ALIGNED(16) uint64_t output0;
+ __VOLK_ATTR_ALIGNED(16) uint64_t output01;
input0 = ((uint64_t) (rand() - (RAND_MAX/2)));
output0 = 0;
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index b0f63d2b5..db606a472 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -3,16 +3,16 @@
#include <boost/foreach.hpp>
#include <boost/assign/list_of.hpp>
#include <boost/tokenizer.hpp>
-//#include <boost/test/unit_test.hpp>
#include <iostream>
#include <vector>
#include <list>
#include <ctime>
#include <cmath>
+#include <limits>
#include <boost/lexical_cast.hpp>
-//#include <volk/volk_runtime.h>
-#include <volk/volk_registry.h>
#include <volk/volk.h>
+#include <volk/volk_cpu.h>
+#include <volk/volk_common.h>
#include <boost/typeof/typeof.hpp>
#include <boost/type_traits.hpp>
@@ -62,50 +62,14 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
}
}
-static std::vector<std::string> get_arch_list(const int archs[]) {
+static std::vector<std::string> get_arch_list(struct volk_func_desc desc) {
std::vector<std::string> archlist;
- int num_archs = archs[0];
-
- //there has got to be a way to query these arches
- for(int i = 0; i < num_archs; i++) {
- switch(archs[i+1]) {
- case (1<<LV_GENERIC):
- archlist.push_back("generic");
- break;
- case (1<<LV_ORC):
- archlist.push_back("orc");
- break;
- case (1<<LV_SSE):
- archlist.push_back("sse");
- break;
- case (1<<LV_SSE2):
- archlist.push_back("sse2");
- break;
- case (1<<LV_SSE3):
- archlist.push_back("sse3");
- break;
- case (1<<LV_SSSE3):
- archlist.push_back("ssse3");
- break;
- case (1<<LV_SSE4_1):
- archlist.push_back("sse4_1");
- break;
- case (1<<LV_SSE4_2):
- archlist.push_back("sse4_2");
- break;
- case (1<<LV_SSE4_A):
- archlist.push_back("sse4_a");
- break;
- case (1<<LV_MMX):
- archlist.push_back("mmx");
- break;
- case (1<<LV_AVX):
- archlist.push_back("avx");
- break;
- default:
- break;
- }
+
+ for(int i = 0; i < desc.n_archs; i++) {
+ //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
+ archlist.push_back(std::string(desc.indices[i]));
}
+
return archlist;
}
@@ -256,7 +220,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
bool fail = false;
int print_max_errs = 10;
for(int i=0; i<vlen; i++) {
- if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) {
+ if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) {
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
@@ -269,7 +233,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
class volk_qa_aligned_mem_pool{
public:
- void *get_new(size_t size, size_t alignment = 16){
+ void *get_new(size_t size, size_t alignment = 32){
_mems.push_back(std::vector<char>(size + alignment-1, 0));
size_t ptr = size_t(&_mems.back().front());
return (void *)((ptr + alignment-1) & ~(alignment-1));
@@ -277,11 +241,19 @@ public:
private: std::list<std::vector<char> > _mems;
};
-bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
+bool run_volk_tests(struct volk_func_desc desc,
+ void (*manual_func)(),
+ std::string name,
+ float tol,
+ float scalar,
+ int vlen,
+ int iter,
+ std::vector<std::string> *best_arch_vector = 0
+ ) {
std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
//first let's get a list of available architectures for the test
- std::vector<std::string> arch_list = get_arch_list(archs);
+ std::vector<std::string> arch_list = get_arch_list(desc);
if(arch_list.size() < 2) {
std::cout << "no architectures to test" << std::endl;
@@ -334,6 +306,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
//now run the test
clock_t start, end;
+ std::vector<double> profile_times;
for(int i = 0; i < arch_list.size(); i++) {
start = clock();
@@ -368,8 +341,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
}
end = clock();
- std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+ double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
+
+ profile_times.push_back(arch_time);
}
+
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
int generic_offset=0;
@@ -381,7 +358,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
bool fail = false;
bool fail_global = false;
+ std::vector<bool> arch_results;
for(int i=0; i<arch_list.size(); i++) {
+ fail = false;
if(i != generic_offset) {
for(int j=0; j<both_sigs.size(); j++) {
if(both_sigs[j].is_float) {
@@ -432,6 +411,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
//fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
}
}
+ arch_results.push_back(!fail);
+ }
+
+ double best_time = std::numeric_limits<double>::max();
+ std::string best_arch = "generic";
+ for(int i=0; i < arch_list.size(); i++) {
+ if((profile_times[i] < best_time) && arch_results[i]) {
+ best_time = profile_times[i];
+ best_arch = arch_list[i];
+ }
+ }
+
+ std::cout << "Best arch: " << best_arch << std::endl;
+ if(best_arch_vector) {
+ best_arch_vector->push_back(name + std::string(" ") + best_arch);
}
return fail_global;
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 1b64bacaa..a1bc1f20c 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -3,6 +3,9 @@
#include <cstdlib>
#include <string>
+#include <vector>
+#include <volk/volk.h>
+#include <volk/volk_common.h>
struct volk_type_t {
bool is_float;
@@ -18,10 +21,10 @@ volk_type_t volk_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
-bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int);
-
-#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0)
+bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *);
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); }
+#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results)
typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 779bc61eb..349fb0630 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -1,100 +1,93 @@
#include "qa_utils.h"
#include <volk/volk.h>
-#include <volk/volk_registry.h>
#include <boost/test/unit_test.hpp>
-BOOST_AUTO_TEST_CASE(volk_test_all) {
- //in order...
-// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
-// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000);
-// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
-// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
- VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000);
-// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
-// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000);
-// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000);
-// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000);
- VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
- VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
+//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
+//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100);
+VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000);
+//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
+//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50);
+VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100);
+//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100);
+VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000);
+//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100);
+VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000);
+//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100);
+VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100);
+VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000);
+VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000);
+VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000);
+//VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000);
+//VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000);
+VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400);
+VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400);
+VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000);
+VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000);
-}
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
new file mode 100644
index 000000000..ebfe3bc40
--- /dev/null
+++ b/volk/lib/volk_prefs.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <volk/volk_prefs.h>
+
+//#if defined(_WIN32)
+//#include <Windows.h>
+//#endif
+
+void get_config_path(char *path) {
+ const char *suffix = "/.volk/volk_config";
+ strcpy(path, getenv("HOME"));
+ strcat(path, suffix);
+}
+
+//passing by reference in C can suck my balls
+int load_preferences(struct volk_arch_pref **prefs) {
+ FILE *config_file;
+ char path[512], line[512], function[128], arch[32];
+ int n_arch_prefs = 0;
+ struct volk_arch_pref *t_pref;
+
+ //get the config path
+ get_config_path(path);
+ config_file = fopen(path, "r");
+ if(!config_file) return; //no prefs found
+
+ while(fgets(line, 512, config_file) != NULL) {
+ if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
+ n_arch_prefs++;
+ }
+ }
+
+ //now allocate the memory required for volk_arch_prefs
+ (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref));
+ t_pref = (*prefs);
+
+ //reset the file pointer and write the prefs into volk_arch_prefs
+ rewind(config_file);
+ while(fgets(line, 512, config_file) != NULL) {
+ if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
+ strncpy(t_pref->name, function, 128);
+ strncpy(t_pref->arch, arch, 32);
+ t_pref++;
+ }
+ }
+ fclose(config_file);
+ return n_arch_prefs;
+}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index b1a93db26..e10433fd0 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -1,10 +1,40 @@
-#include<volk_rank_archs.h>
-#include<stdio.h>
+#include <volk_rank_archs.h>
+#include <volk/volk_prefs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
-unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) {
- int i = 2;
+unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) {
+ int i;
+ for(i=0; i<n_archs; i++) {
+ if(!strncmp(indices[i], arch_name, 20)) {
+ return i;
+ }
+ }
+ //something terrible should happen here
+ printf("Volk warning: no arch found, returning generic impl\n");
+ return get_index(indices, n_archs, "generic"); //but we'll fake it for now
+}
+
+unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) {
+ int i;
unsigned int best_val = 0;
- for(; i < arch_defs[0] + 1; ++i) {
+ static struct volk_arch_pref *volk_arch_prefs;
+ static int n_arch_prefs = 0;
+ static int prefs_loaded = 0;
+ if(!prefs_loaded) {
+ n_arch_prefs = load_preferences(&volk_arch_prefs);
+ prefs_loaded = 1;
+ }
+
+ //now look for the function name in the prefs list
+ for(i=0; i < n_arch_prefs; i++) {
+ if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it
+ return get_index(indices, n_archs, volk_arch_prefs[i].arch);
+ }
+ }
+
+ for(i=1; i < n_archs; ++i) {
if((arch_defs[i]&(!arch)) == 0) {
best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;
}
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
index 26b9f7503..546240d2c 100644
--- a/volk/lib/volk_rank_archs.h
+++ b/volk/lib/volk_rank_archs.h
@@ -5,8 +5,8 @@
extern "C" {
#endif
-unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch);
-
+unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name);
+unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch);
#ifdef __cplusplus
}