summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/CMakeLists.txt364
-rw-r--r--volk/lib/gcc_x86_cpuid.h6
-rw-r--r--volk/lib/testqa.cc2
3 files changed, 172 insertions, 200 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
index b491f94bb..8288786c9 100644
--- a/volk/lib/CMakeLists.txt
+++ b/volk/lib/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright 2011 Free Software Foundation, Inc.
+# Copyright 2011-2012 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,6 +16,31 @@
#
########################################################################
+# header file detection
+########################################################################
+include(CheckIncludeFile)
+CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
+if(HAVE_CPUID_H)
+ add_definitions(-DHAVE_CPUID_H)
+endif()
+
+CHECK_INCLUDE_FILE(intrin.h HAVE_INTRIN_H)
+if(HAVE_INTRIN_H)
+ add_definitions(-DHAVE_INTRIN_H)
+endif()
+
+CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H)
+if(HAVE_FENV_H)
+ add_definitions(-DHAVE_FENV_H)
+endif()
+
+CHECK_INCLUDE_FILE(dlfcn.h HAVE_DLFCN_H)
+if(HAVE_DLFCN_H)
+ add_definitions(-DHAVE_DLFCN_H)
+ list(APPEND volk_libraries ${CMAKE_DL_LIBS})
+endif()
+
+########################################################################
# Setup the compiler name
########################################################################
set(COMPILER_NAME ${CMAKE_C_COMPILER_ID})
@@ -23,235 +48,183 @@ if(MSVC) #its not set otherwise
set(COMPILER_NAME MSVC)
endif()
+message(STATUS "Compiler name: ${COMPILER_NAME}")
+
if(NOT DEFINED COMPILER_NAME)
message(FATAL_ERROR "COMPILER_NAME undefined. Volk build may not support this compiler.")
endif()
########################################################################
-# Parse the arches xml file:
-# Test each arch to see if the compiler supports the flag.
-# If the test passes append the arch to the available list.
+# detect x86 flavor of CPU
########################################################################
-#extract the compiler lines from the xml file using abusive python
-
-
-
-execute_process(
- COMMAND ${PYTHON_EXECUTABLE} -c
- "from xml.dom import minidom; print ';'.join(map(lambda b: ','.join([','.join([b.attributes['name'].value,item.attributes['name'].value,item.firstChild.data]) for item in b.getElementsByTagName('remap')]), minidom.parse('${CMAKE_SOURCE_DIR}/gen/compilers.xml').getElementsByTagName('compiler')))"
-
- OUTPUT_VARIABLE compiler_lines OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-
-foreach(thing ${compiler_lines})
- string(REGEX REPLACE "," ";" thing_list ${thing})
- list(FIND thing_list ${COMPILER_NAME} check_val)
- if(NOT ("${check_val}" STREQUAL "-1"))
- string(REGEX REPLACE "${COMPILER_NAME}," ";" filter_string ${thing})
- endif()
-endforeach()
-
+if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$")
+ message(STATUS "x86* CPU detected")
+ set(CPU_IS_x86 TRUE)
+endif()
-#extract compiler prefixes from the xml file using abusive python
+########################################################################
+# determine passing architectures based on compile flag tests
+########################################################################
execute_process(
- COMMAND ${PYTHON_EXECUTABLE} -c
- "from xml.dom import minidom; print ';'.join(map(lambda b: ','.join([','.join([b.attributes['name'].value,item.firstChild.data]) for item in b.getElementsByTagName('prefix')]), minidom.parse('${CMAKE_SOURCE_DIR}/gen/compilers.xml').getElementsByTagName('compiler')))"
-
- OUTPUT_VARIABLE compiler_prefixes OUTPUT_STRIP_TRAILING_WHITESPACE
+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
+ ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
+ --mode "arch_flags" --compiler "${COMPILER_NAME}"
+ OUTPUT_VARIABLE arch_flag_lines OUTPUT_STRIP_TRAILING_WHITESPACE
)
-foreach(thing ${compiler_prefixes})
- string(REGEX REPLACE "," ";" thing_list ${thing})
- list(FIND thing_list ${COMPILER_NAME} check_val)
- if(NOT ("${check_val}" STREQUAL "-1"))
- list(GET thing_list "1" prefix)
+macro(check_arch arch_name)
+ set(flags ${ARGN})
+ set(have_${arch_name} TRUE)
+ foreach(flag ${flags})
+ include(CheckCXXCompilerFlag)
+ set(have_flag have${flag})
+ execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary)
+ COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))"
+ OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+ CHECK_CXX_COMPILER_FLAG(${flag} ${have_flag})
+ if (NOT ${have_flag})
+ set(have_${arch_name} FALSE)
+ endif()
+ endforeach(flag)
+ if (have_${arch_name})
+ list(APPEND available_archs ${arch_name})
endif()
-endforeach()
-
-
-
-
-#extract the arch lines from the xml file using abusive python
-execute_process(
- COMMAND ${PYTHON_EXECUTABLE} -c
- "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s %s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data,a.getElementsByTagName('overrule')[0].firstChild.data,a.getElementsByTagName('overrule_val')[0].firstChild.data) if (len(a.getElementsByTagName('overrule'))) else '%s %s %s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data,'no_overrule', 'no_overrule_val'), minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))"
-
- OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE
-)
+endmacro(check_arch)
+foreach(line ${arch_flag_lines})
+ string(REGEX REPLACE "," ";" arch_flags ${line})
+ check_arch(${arch_flags})
+endforeach(line)
+macro(OVERRULE_ARCH arch reason)
+ message(STATUS "${reason}, Overruled arch ${arch}")
+ list(REMOVE_ITEM available_archs ${arch})
+endmacro(OVERRULE_ARCH)
-
-#set the various overrule values (see archs.xml)
-#a lot of this is translating between automake and cmake
-if(NOT "${CROSSCOMPILE_MULTILIB}" STREQUAL "true")
- set(MD_SUBCPU ${CMAKE_SYSTEM_PROCESSOR})
- #detect 32 or 64 bit compiler
- if(MD_SUBCPU MATCHES "^(i.86|x86|x86_64|amd64)$")
- include(CheckTypeSize)
- check_type_size("void*" SIZEOF_VOID_P BUILTIN_TYPES_ONLY)
- if (${SIZEOF_VOID_P} EQUAL 8)
- set(MD_SUBCPU x86_64)
- else()
- set(MD_SUBCPU x86)
- endif()
+########################################################################
+# eliminate AVX on GCC < 4.4
+# even though it accepts -mavx, as won't assemble xgetbv, which we need
+########################################################################
+if(CPU_IS_x86 AND COMPILER_NAME MATCHES "GNU")
+ execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion
+ OUTPUT_VARIABLE GCC_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if(GCC_VERSION VERSION_LESS "4.4")
+ OVERRULE_ARCH(avx "GCC missing xgetbv")
endif()
endif()
-if(NOT "${ORC_FOUND}" STREQUAL "TRUE")
- set(LV_HAVE_ORC "no")
-endif()
-
+########################################################################
+# implement overruling in the ORC case,
+# since ORC always passes flag detection
+########################################################################
+if(NOT ORC_FOUND)
+ OVERRULE_ARCH(orc "ORC support not found")
+endif()
-
-
-
-macro(compiler_filter name flag)
- set(filtered_flag ${flag})
- foreach(thing ${filter_string})
- string(REGEX REPLACE "," ";" flagmap ${thing})
- list(GET flagmap "0" key)
- list(GET flagmap "1" val)
- string(REGEX MATCH "^${key}$" found ${flag})
- if("${found}" STREQUAL "${key}")
- string(REGEX REPLACE "^${key}$" "${val}" filtered_flag ${flag})
- endif()
- endforeach()
- set(${name}_flag "${prefix}${filtered_flag}")
-endmacro()
-
-
-
-
-
-
-
-macro(handle_arch name flag overrule overrule_val)
-
- #handle overrule
- if("${${overrule}}" STREQUAL "${overrule_val}")
- set(have_${name} FALSE)
- message(STATUS "${name} overruled")
- #handle special case for none flag
- elseif(${flag} STREQUAL "none")
- set(have_${name} TRUE)
- #otherwise test the flag(s) against the compiler
- else()
- include(CheckCXXCompilerFlag)
- string(REGEX REPLACE "," ";" flag_list ${flag})
- set(have_${name} 1)
- foreach(thing ${flag_list})
- compiler_filter(${name} ${thing})
- CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${thing})
- if(NOT (${have_${name}} AND ("${have_${thing}}" STREQUAL "1")))
- set(have_${name} 0)
- endif()
- endforeach()
+########################################################################
+# implement overruling in the non-multilib case
+# this makes things work when both -m32 and -m64 pass
+########################################################################
+if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
+ include(CheckTypeSize)
+ check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY)
+ if (${SIZEOF_CPU} EQUAL 64)
+ OVERRULE_ARCH(32 "CPU width is 64 bits")
endif()
-
- if(have_${name})
- list(APPEND available_arches ${name})
+ if (${SIZEOF_CPU} EQUAL 32)
+ OVERRULE_ARCH(64 "CPU width is 32 bits")
endif()
+endif()
-endmacro(handle_arch)
-
-#create a list of available arches
-foreach(arch_line ${arch_lines})
- string(REPLACE " " ";" args "${arch_line}")
- handle_arch(${args})
-endforeach(arch_line)
-
-message(STATUS "Available arches: ${available_arches}")
+########################################################################
+# done overrules! print the result
+########################################################################
+message(STATUS "Available architectures: ${available_archs}")
########################################################################
-# Parse the machines xml file:
-# Test each machine to see if its arch dependencies are supported.
-# Build a list of supported machines and the machine definitions.
+# determine available machines given the available architectures
########################################################################
-#extract the machine lines from the xml file using crazy python
execute_process(
- COMMAND ${PYTHON_EXECUTABLE} -c
- "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))"
- OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE
+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
+ ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
+ --mode "machines" --archs "${available_archs}"
+ OUTPUT_VARIABLE available_machines OUTPUT_STRIP_TRAILING_WHITESPACE
)
-macro(handle_machine1 name)
- unset(machine_flags)
- string(TOUPPER LV_MACHINE_${name} machine_def)
-
- #check if all the arches are supported
- foreach(arch ${ARGN})
- set(is_match ${have_${arch}})
- if(NOT is_match)
- set(is_match FALSE)
- break()
- endif(NOT is_match)
- set(machine_flags "${machine_flags} ${${arch}_flag}")
- endforeach(arch)
-
- string(REGEX REPLACE "^[ \t]+" "" machine_flags "${machine_flags}")
-
- if(is_match)
- #this is a match, append the source and set its flags
- set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c)
- set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS "${machine_flags}")
- list(APPEND machine_sources ${machine_source})
- list(APPEND machine_defs ${machine_def})
- list(APPEND available_machines ${name})
- endif()
-endmacro(handle_machine1)
-
-macro(handle_machine name)
- set(arches ${ARGN})
- list(FIND arches "32|64" index)
- if(${index} EQUAL -1)
- handle_machine1(${name} ${arches})
- else()
- list(REMOVE_ITEM arches "32|64")
- handle_machine1(${name}_32 32 ${arches})
- handle_machine1(${name}_64 64 ${arches})
- endif()
-endmacro(handle_machine)
-
-#setup the available machines
-foreach(machine_line ${machine_lines})
- string(REPLACE " " ";" args "${machine_line}")
- handle_machine(${args})
-endforeach(machine_line)
+########################################################################
+# Implement machine overruling for redundant machines:
+# A machine is redundant when expansion rules occur,
+# and the arch superset passes configuration checks.
+# When this occurs, eliminate the redundant machines
+# to avoid unnecessary compilation of subset machines.
+########################################################################
+foreach(arch orc 64 32)
+ foreach(machine_name ${available_machines})
+ string(REPLACE "_${arch}" "" machine_name_no_arch ${machine_name})
+ if (${machine_name} STREQUAL ${machine_name_no_arch})
+ else()
+ list(REMOVE_ITEM available_machines ${machine_name_no_arch})
+ endif()
+ endforeach(machine_name)
+endforeach(arch)
+########################################################################
+# done overrules! print the result
+########################################################################
message(STATUS "Available machines: ${available_machines}")
########################################################################
# Create rules to run the volk generator
########################################################################
-#list of the generated sources
-set(volk_gen_sources
- ${CMAKE_BINARY_DIR}/include/volk/volk.h
- ${CMAKE_BINARY_DIR}/lib/volk.c
- ${CMAKE_BINARY_DIR}/lib/volk_init.h
- ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h
- ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h
- ${CMAKE_BINARY_DIR}/lib/volk_cpu.c
- ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h
- ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c
- ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h
- ${CMAKE_BINARY_DIR}/lib/volk_machines.h
- ${CMAKE_BINARY_DIR}/lib/volk_machines.c
- ${machine_sources}
-)
#dependencies are all python, xml, and header implementation files
file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
file(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h)
-add_custom_command(
- OUTPUT ${volk_gen_sources}
- DEPENDS ${xml_files} ${py_files} ${h_files}
- COMMAND ${PYTHON_EXECUTABLE} -B
- ${CMAKE_SOURCE_DIR}/gen/volk_register.py
- ${CMAKE_BINARY_DIR}
-)
+macro(gen_template tmpl output)
+ list(APPEND volk_gen_sources ${output})
+ add_custom_command(
+ OUTPUT ${output}
+ DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl}
+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
+ ${CMAKE_SOURCE_DIR}/gen/volk_tmpl_utils.py
+ --input ${tmpl} --output ${output} ${ARGN}
+ )
+endmacro(gen_template)
+
+make_directory(${CMAKE_BINARY_DIR}/include/volk)
+
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk.tmpl.h ${CMAKE_BINARY_DIR}/include/volk/volk.h)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk.c)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_typedefs.tmpl.h ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_cpu.tmpl.h ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_cpu.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_cpu.c)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_config_fixed.tmpl.h ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machines.tmpl.h ${CMAKE_BINARY_DIR}/lib/volk_machines.h)
+gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machines.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_machines.c)
+
+foreach(machine_name ${available_machines})
+ #generate machine source
+ set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${machine_name}.c)
+ gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machine_xxx.tmpl.c ${machine_source} ${machine_name})
+
+ #determine machine flags
+ execute_process(
+ COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
+ ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
+ --mode "machine_flags" --machine "${machine_name}" --compiler "${COMPILER_NAME}"
+ OUTPUT_VARIABLE ${machine_name}_flags OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+ if(${machine_name}_flags)
+ set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}")
+ endif()
+
+ #add to available machine defs
+ string(TOUPPER LV_MACHINE_${machine_name} machine_def)
+ list(APPEND machine_defs ${machine_def})
+endforeach(machine_name)
########################################################################
# Set local include directories first
@@ -270,7 +243,7 @@ if(ORC_FOUND)
#setup orc library usage
include_directories(${ORC_INCLUDE_DIRS})
link_directories(${ORC_LIBRARY_DIRS})
- add_definitions(-DLV_HAVE_ORC)
+ list(APPEND volk_libraries ${ORC_LIBRARIES})
#setup orc functions
file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
@@ -313,16 +286,15 @@ PROPERTIES COMPILE_DEFINITIONS "${machine_defs}")
if(MSVC)
#add compatibility includes for stdint types
- include_directories(${CMAKE_SOURCE_DIR}/msvc)
+ include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
+ add_definitions(-DHAVE_CONFIG_H)
#compile the sources as C++ due to the lack of complex.h under MSVC
set_source_files_properties(${volk_sources} PROPERTIES LANGUAGE CXX)
endif()
#create the volk runtime library
add_library(volk SHARED ${volk_sources})
-if(ORC_FOUND)
- target_link_libraries(volk ${ORC_LIBRARIES})
-endif(ORC_FOUND)
+target_link_libraries(volk ${volk_libraries})
set_target_properties(volk PROPERTIES SOVERSION ${LIBVER})
set_target_properties(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS")
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
index e0254f192..3c3f47b00 100644
--- a/volk/lib/gcc_x86_cpuid.h
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -5,16 +5,16 @@
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option) any
* later version.
- *
+ *
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
- *
+ *
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 593087f85..d0204fc01 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -55,7 +55,7 @@ VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 1);
VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 1);
//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000);
VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 3, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32767, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1);