diff options
author | Tom Rondeau | 2011-08-27 16:44:32 -0400 |
---|---|---|
committer | Tom Rondeau | 2011-08-27 16:44:32 -0400 |
commit | 54881f8803d4f796dde2af031e6f1a37df9445f1 (patch) | |
tree | b1e4e6c34004f22a29c03815ed3ae49065693dce /volk/lib | |
parent | 50cde24aea52d66d69313a490f7eab78a5085849 (diff) | |
parent | f4cc7884c608a7ec1969e68b73e12cdbcc26145c (diff) | |
download | gnuradio-54881f8803d4f796dde2af031e6f1a37df9445f1.tar.gz gnuradio-54881f8803d4f796dde2af031e6f1a37df9445f1.tar.bz2 gnuradio-54881f8803d4f796dde2af031e6f1a37df9445f1.zip |
Merge branch 'master' of gnuradio.org:gnuradio
Diffstat (limited to 'volk/lib')
-rw-r--r-- | volk/lib/.gitignore | 21 | ||||
-rw-r--r-- | volk/lib/CMakeLists.txt | 261 | ||||
-rw-r--r-- | volk/lib/Makefile.am | 158 | ||||
-rw-r--r-- | volk/lib/qa_16s_add_quad_aligned16.cc | 26 | ||||
-rw-r--r-- | volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 20 | ||||
-rw-r--r-- | volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc | 18 | ||||
-rw-r--r-- | volk/lib/qa_16s_quad_max_star_aligned16.cc | 12 | ||||
-rw-r--r-- | volk/lib/qa_32f_fm_detect_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32u_popcnt_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_64u_popcnt_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 95 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 9 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 183 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 49 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 40 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.h | 4 |
17 files changed, 537 insertions, 383 deletions
diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 6a5fde28f..28ec6ddaa 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -1,23 +1,4 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.c -/volk_cpu_generic.c -/volk_cpu_powerpc.c -/volk_cpu_x86.c -/volk_environment_init.c -/volk_init.c -/volk_init.h -/volk_mktables -/volk_mktables.c -/volk_proccpu_sim.c -/volk_runtime.c -/test_all +/Makefile.am /testqa diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt new file mode 100644 index 000000000..33a478265 --- /dev/null +++ b/volk/lib/CMakeLists.txt @@ -0,0 +1,261 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Parse the arches xml file: +# Test each arch to see if the compiler supports the flag. +# If the test passes append the arch to the available list. +######################################################################## +#extract the arch lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))" + OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +#This macro sets the ${arch}_flag variable, +#and handles special cases for MSVC arch flags. +MACRO(set_arch_flag name flag) + IF(MSVC AND ${name} STREQUAL "mmx") + SET(${name}_flag "/arch:SSE") #no /arch:MMX + ELSEIF(MSVC AND ${name} STREQUAL "sse") + SET(${name}_flag "/arch:SSE") + ELSEIF(MSVC AND ${name} STREQUAL "sse2") + SET(${name}_flag "/arch:SSE2") + ELSE() + SET(${name}_flag -${flag}) + ENDIF() +ENDMACRO(set_arch_flag) + +MACRO(handle_arch name flag) + + #handle special case for none flag + IF(${flag} STREQUAL "none") + SET(have_${name} TRUE) + + #otherwise test the flag against the compiler + ELSE() + INCLUDE(CheckCXXCompilerFlag) + set_arch_flag(${name} ${flag}) + CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name}) + ENDIF() + + IF(have_${name}) + LIST(APPEND available_arches ${name}) + ENDIF() +ENDMACRO(handle_arch) + +#create a list of available arches +FOREACH(arch_line ${arch_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}") + handle_arch(${args}) +ENDFOREACH(arch_line) + +MESSAGE(STATUS "Available arches: ${available_arches}") + +######################################################################## +# Parse the machines xml file: +# Test each machine to see if its arch dependencies are supported. +# Build a list of supported machines and the machine definitions. +######################################################################## +#extract the machine lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))" + OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +MACRO(handle_machine1 name) + UNSET(machine_flags) + STRING(TOUPPER LV_MACHINE_${name} machine_def) + + #check if all the arches are supported + FOREACH(arch ${ARGN}) + SET(is_match ${have_${arch}}) + IF(NOT is_match) + SET(is_match FALSE) + BREAK() + ENDIF(NOT is_match) + SET(machine_flags "${machine_flags} ${${arch}_flag}") + ENDFOREACH(arch) + + IF(is_match) + #this is a match, append the source and set its flags + SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) + SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) + LIST(APPEND machine_sources ${machine_source}) + LIST(APPEND machine_defs ${machine_def}) + LIST(APPEND available_machines ${name}) + ENDIF() +ENDMACRO(handle_machine1) + +MACRO(handle_machine name) + SET(arches ${ARGN}) + LIST(FIND arches "32|64" index) + IF(${index} EQUAL -1) + handle_machine1(${name} ${arches}) + ELSE() + LIST(REMOVE_ITEM arches "32|64") + handle_machine1(${name}_32 32 ${arches}) + handle_machine1(${name}_64 64 ${arches}) + ENDIF() +ENDMACRO(handle_machine) + +#setup the available machines +FOREACH(machine_line ${machine_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}") + handle_machine(${args}) +ENDFOREACH(machine_line) + +MESSAGE(STATUS "Available machines: ${available_machines}") + +######################################################################## +# Create rules to run the volk generator +######################################################################## +#list of the generated sources +SET(volk_gen_sources + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/lib/volk.c + ${CMAKE_BINARY_DIR}/lib/volk_init.h + ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/lib/volk_cpu.c + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.c + ${machine_sources} +) + +#dependencies are all python, xml, and header implementation files +FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) + +ADD_CUSTOM_COMMAND( + OUTPUT ${volk_gen_sources} + DEPENDS ${xml_files} ${py_files} ${h_files} + COMMAND ${PYTHON_EXECUTABLE} -B + ${CMAKE_SOURCE_DIR}/gen/volk_register.py + ${CMAKE_BINARY_DIR} +) + +######################################################################## +# Handle orc support +######################################################################## +FIND_PACKAGE(PkgConfig) +IF(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4") +ENDIF(PKG_CONFIG_FOUND) + +FIND_PROGRAM(ORCC_EXECUTABLE orcc) + +IF(ORC_FOUND AND ORCC_EXECUTABLE) + #setup orc library usage + INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) + LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) + ADD_DEFINITIONS(-DLV_HAVE_ORC) + + #setup orc functions + FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + FOREACH(orc_file ${orc_files}) + + #extract the name for the generated c source from the orc file + GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE) + SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + + #create a rule to generate the source and add to the list of sources + ADD_CUSTOM_COMMAND( + COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} + DEPENDS ${orc_file} OUTPUT ${orcc_gen} + ) + LIST(APPEND volk_sources ${orcc_gen}) + + ENDFOREACH(orc_file) +ELSE() + MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...") +ENDIF() + +######################################################################## +# Setup the volk sources list and library +######################################################################## +IF(NOT WIN32) + ADD_DEFINITIONS(-fvisibility=hidden) +ENDIF() + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +LIST(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${volk_gen_sources} +) + +#set the machine definitions where applicable +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_BINARY_DIR}/volk.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c +PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") + +IF(MSVC) + #add compatibility includes for stdint types + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) + #compile the sources as C++ due to the lack of complex.h under MSVC + SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) +ENDIF(MSVC) + +#create the volk runtime library +ADD_LIBRARY(volk SHARED ${volk_sources}) +TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) +SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) +SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") + +INSTALL(TARGETS volk + LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file + RUNTIME DESTINATION bin # .dll file +) + +######################################################################## +# Build the QA test application +######################################################################## +FIND_PACKAGE(Boost COMPONENTS unit_test_framework) + +IF(Boost_FOUND) + +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES + COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" +) + +INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) + +ADD_EXECUTABLE(test_all + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) +ADD_TEST(qa_volk_test_all test_all) + +ENDIF() diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am deleted file mode 100644 index 473acd2a6..000000000 --- a/volk/lib/Makefile.am +++ /dev/null @@ -1,158 +0,0 @@ -# -# Copyright 2010,2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -include $(top_srcdir)/Makefile.common - -#FIXME: forcing the top_builddir for distcheck seems like a bit -# of a hack. Figure out the right way to do this to find built -# volk_config.h and volk_tables.h - -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ - -I$(top_builddir)/include \ - $(LV_CXXFLAGS) $(WITH_INCLUDES) - - -# We build 1 library and 1 executable here. The library contains -# everything except the QA code. The C++ QA code is especially recommended -# when you have general purpose C or C++ code that may not get -# thoroughly exercised by building and running a GR block. The -# executable runs the QA code at "make check" time. -# -# -# -# N.B., If there's a SWIG generated shared library and associated -# python code, it will be contained in ../python, not here. (That -# code is conditionally built depending on the state of the -# --without-python configure option.) However, the .i should be here -# next to the .h that it's based on. - - -# list of programs run by "make check" and "make distcheck" -#TESTS = testqa -#orc stuff gets built in the ORC directory conditional to ORC being enabled. -#it gets linked in during the build of libvolk as an added library. -#there might be a better way to do this. - -lib_LTLIBRARIES = \ - libvolk.la \ - libvolk_runtime.la - -EXTRA_DIST = \ - volk_mktables.c \ - volk_rank_archs.h \ - volk_proccpu_sim.c \ - gcc_x86_cpuid.h - -# ---------------------------------------------------------------- -# The main library -# ---------------------------------------------------------------- - -libvolk_runtime_la_SOURCES = \ - $(platform_CODE) \ - volk_runtime.c \ - volk_init.c \ - volk_rank_archs.c - -libvolk_la_SOURCES = \ - $(platform_CODE) \ - volk.c \ - volk_environment_init.c - -volk_orc_LDFLAGS = \ - $(ORC_LDFLAGS) \ - -lorc-0.4 - -volk_orc_LIBADD = \ - ../orc/libvolk_orc.la - -if LV_HAVE_ORC -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_la_LIBADD = $(volk_orc_LIBADD) -else -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_la_LIBADD = -endif - - -# ---------------------------------------------------------------- -# The QA library. Note libvolk.la in LIBADD -# ---------------------------------------------------------------- -#libvolk_qa_la_SOURCES = \ -# qa_utils.cc - -#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost - -#libvolk_qa_la_LIBADD = \ -# libvolk.la \ -# libvolk_runtime.la - -# ---------------------------------------------------------------- -# headers that don't get installed -# ---------------------------------------------------------------- -noinst_HEADERS = \ - volk_init.h \ - qa_utils.h - -# ---------------------------------------------------------------- -# Our test program -# ---------------------------------------------------------------- -noinst_PROGRAMS = \ - testqa - -testqa_SOURCES = testqa.cc qa_utils.cc -testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) \ - $(BOOST_CPPFLAGS) -testqa_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) -if LV_HAVE_ORC -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la \ - ../orc/libvolk_orc.la -else -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la -endif - -distclean-local: - rm -f volk.c - rm -f volk_cpu_generic.c - rm -f volk_cpu_powerpc.c - rm -f volk_cpu_x86.c - rm -f volk_init.c - rm -f volk_init.h - rm -f volk_mktables.c - rm -f volk_proccpu_sim.c - rm -f volk_runtime.c - rm -f volk_tables.h - rm -f volk_environment_init.c -#SUBDIRS = - -#ifdef BUILD_SSE -#SUBDIRS += sse -#elif BUILD_SPU -#SUBDIRS += spu -#else -#SUBDIRS += port -#endif - - diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 154aa0f17..5d5eb7e18 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() { double total; const int vlen = 3200; const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); - short input4[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); - short output2[vlen] __attribute__ ((aligned (16))); - short output3[vlen] __attribute__ ((aligned (16))); - short output01[vlen] __attribute__ ((aligned (16))); - short output11[vlen] __attribute__ ((aligned (16))); - short output21[vlen] __attribute__ ((aligned (16))); - short output31[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 62deffaeb..2e6e6a1a0 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short target3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; - short cntl0[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl1[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl2[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; - short cntl3[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 819b2256b..3cd4e906d 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))); - short cntl0[vlen] __attribute__ ((aligned (16))); - short cntl1[vlen] __attribute__ ((aligned (16))); - short cntl2[vlen] __attribute__ ((aligned (16))); - short cntl3[vlen] __attribute__ ((aligned (16))); - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; for(int i = 0; i < vlen; ++i) { src0[i] = i; diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 66f8c9afa..192a69e35 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = (short) (rand() - (RAND_MAX/2)); diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index 592304f83..a2e7a85be 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index a3d0955bd..981bb19e6 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen]; - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float scalar = vlen; const float rbw = 1.7; diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 618a82a02..c880260f2 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint32_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t input0; - uint32_t output0 __attribute__ ((aligned (16))); - uint32_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t output0; + __VOLK_ATTR_ALIGNED(16) uint32_t output01; input0 = ((uint32_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 85ef58795..6be4e50ea 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint64_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t input0; - uint64_t output0 __attribute__ ((aligned (16))); - uint64_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t output0; + __VOLK_ATTR_ALIGNED(16) uint64_t output01; input0 = ((uint64_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index b0f63d2b5..7f86dd78b 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,16 +3,16 @@ #include <boost/foreach.hpp> #include <boost/assign/list_of.hpp> #include <boost/tokenizer.hpp> -//#include <boost/test/unit_test.hpp> #include <iostream> #include <vector> #include <list> #include <ctime> #include <cmath> +#include <limits> #include <boost/lexical_cast.hpp> -//#include <volk/volk_runtime.h> -#include <volk/volk_registry.h> #include <volk/volk.h> +#include <volk/volk_cpu.h> +#include <volk/volk_common.h> #include <boost/typeof/typeof.hpp> #include <boost/type_traits.hpp> @@ -62,50 +62,14 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -static std::vector<std::string> get_arch_list(const int archs[]) { +static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { std::vector<std::string> archlist; - int num_archs = archs[0]; - - //there has got to be a way to query these arches - for(int i = 0; i < num_archs; i++) { - switch(archs[i+1]) { - case (1<<LV_GENERIC): - archlist.push_back("generic"); - break; - case (1<<LV_ORC): - archlist.push_back("orc"); - break; - case (1<<LV_SSE): - archlist.push_back("sse"); - break; - case (1<<LV_SSE2): - archlist.push_back("sse2"); - break; - case (1<<LV_SSE3): - archlist.push_back("sse3"); - break; - case (1<<LV_SSSE3): - archlist.push_back("ssse3"); - break; - case (1<<LV_SSE4_1): - archlist.push_back("sse4_1"); - break; - case (1<<LV_SSE4_2): - archlist.push_back("sse4_2"); - break; - case (1<<LV_SSE4_A): - archlist.push_back("sse4_a"); - break; - case (1<<LV_MMX): - archlist.push_back("mmx"); - break; - case (1<<LV_AVX): - archlist.push_back("avx"); - break; - default: - break; - } + + for(int i = 0; i < desc.n_archs; i++) { + //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc + archlist.push_back(std::string(desc.indices[i])); } + return archlist; } @@ -256,7 +220,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i<vlen; i++) { - if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) { + if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; @@ -269,7 +233,8 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { class volk_qa_aligned_mem_pool{ public: - void *get_new(size_t size, size_t alignment = 16){ + void *get_new(size_t size){ + size_t alignment = volk_get_alignment(); _mems.push_back(std::vector<char>(size + alignment-1, 0)); size_t ptr = size_t(&_mems.back().front()); return (void *)((ptr + alignment-1) & ~(alignment-1)); @@ -277,11 +242,19 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector<std::string> *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test - std::vector<std::string> arch_list = get_arch_list(archs); + std::vector<std::string> arch_list = get_arch_list(desc); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; @@ -334,6 +307,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //now run the test clock_t start, end; + std::vector<double> profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -368,8 +342,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -381,7 +359,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, bool fail = false; bool fail_global = false; + std::vector<bool> arch_results; for(int i=0; i<arch_list.size(); i++) { + fail = false; if(i != generic_offset) { for(int j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { @@ -432,6 +412,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); } } + arch_results.push_back(!fail); + } + + double best_time = std::numeric_limits<double>::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 1b64bacaa..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,9 @@ #include <cstdlib> #include <string> +#include <vector> +#include <volk/volk.h> +#include <volk/volk_common.h> struct volk_type_t { bool is_float; @@ -18,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 779bc61eb..62e62c2f4 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -1,100 +1,93 @@ #include "qa_utils.h" #include <volk/volk.h> -#include <volk/volk_registry.h> #include <boost/test/unit_test.hpp> -BOOST_AUTO_TEST_CASE(volk_test_all) { - //in order... -// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); -// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 100); +VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000); +//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000); +//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000); +VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 50); +VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 100); +//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 100); +VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 10000); +//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 100); +VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 20460, 5000); +//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 0, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 100); +VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 100); +VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 3000); +VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 5000); +VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 10000); +VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 2000); +//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1000); +VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1000); +//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 10000); +VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 400); +VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 400); +VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000); +VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000); -} diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..9743c51d9 --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <volk/volk_prefs.h> + +//#if defined(_WIN32) +//#include <Windows.h> +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.volk/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can (***********) +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index b1a93db26..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,10 +1,40 @@ -#include<volk_rank_archs.h> -#include<stdio.h> +#include <volk_rank_archs.h> +#include <volk/volk_prefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) { - int i = 2; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i<n_archs; i++) { + if(!strncmp(indices[i], arch_name, 20)) { + return i; + } + } + //something terrible should happen here + printf("Volk warning: no arch found, returning generic impl\n"); + return get_index(indices, n_archs, "generic"); //but we'll fake it for now +} + +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { + int i; unsigned int best_val = 0; - for(; i < arch_defs[0] + 1; ++i) { + static struct volk_arch_pref *volk_arch_prefs; + static int n_arch_prefs = 0; + static int prefs_loaded = 0; + if(!prefs_loaded) { + n_arch_prefs = load_preferences(&volk_arch_prefs); + prefs_loaded = 1; + } + + //now look for the function name in the prefs list + for(i=0; i < n_arch_prefs; i++) { + if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it + return get_index(indices, n_archs, volk_arch_prefs[i].arch); + } + } + + for(i=1; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 26b9f7503..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,8 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch); - +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } |