diff options
Diffstat (limited to 'volk/lib')
-rw-r--r-- | volk/lib/CMakeLists.txt | 264 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 32 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 2 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 6 |
4 files changed, 168 insertions, 136 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index 33a478265..e18d13677 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -21,51 +21,72 @@ # If the test passes append the arch to the available list. ######################################################################## #extract the arch lines from the xml file using crazy python -EXECUTE_PROCESS( +execute_process( COMMAND ${PYTHON_EXECUTABLE} -c "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))" OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE ) +#get any mutually exclusive archs so we can exclude them +#this is really for compilers which can do both 32- and 64-bit compilations. +execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.parentNode.attributes['name'].value,a.firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('mutex')))" + OUTPUT_VARIABLE mutex_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + #This macro sets the ${arch}_flag variable, #and handles special cases for MSVC arch flags. -MACRO(set_arch_flag name flag) - IF(MSVC AND ${name} STREQUAL "mmx") - SET(${name}_flag "/arch:SSE") #no /arch:MMX - ELSEIF(MSVC AND ${name} STREQUAL "sse") - SET(${name}_flag "/arch:SSE") - ELSEIF(MSVC AND ${name} STREQUAL "sse2") - SET(${name}_flag "/arch:SSE2") - ELSE() - SET(${name}_flag -${flag}) - ENDIF() -ENDMACRO(set_arch_flag) - -MACRO(handle_arch name flag) +macro(set_arch_flag name flag) + if(MSVC AND ${name} STREQUAL "mmx") + set(${name}_flag "/arch:SSE") #no /arch:MMX + elseif(MSVC AND ${name} STREQUAL "sse") + set(${name}_flag "/arch:SSE") + elseif(MSVC AND ${name} STREQUAL "sse2") + set(${name}_flag "/arch:SSE2") + else() + set(${name}_flag -${flag}) + endif() +endmacro(set_arch_flag) + +macro(handle_arch name flag) #handle special case for none flag - IF(${flag} STREQUAL "none") - SET(have_${name} TRUE) + if(${flag} STREQUAL "none") + set(have_${name} TRUE) #otherwise test the flag against the compiler - ELSE() - INCLUDE(CheckCXXCompilerFlag) + else() + include(CheckCXXCompilerFlag) set_arch_flag(${name} ${flag}) CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name}) - ENDIF() + endif() - IF(have_${name}) - LIST(APPEND available_arches ${name}) - ENDIF() -ENDMACRO(handle_arch) + if(have_${name}) + list(APPEND available_arches ${name}) + endif() +endmacro(handle_arch) + +macro(remove_mutex name mutex) + if(have_${name}) + unset(have_${mutex}) + endif() + list(REMOVE_ITEM available_arches ${mutex}) +endmacro(remove_mutex) #create a list of available arches -FOREACH(arch_line ${arch_lines}) - SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}") +foreach(arch_line ${arch_lines}) + separate_arguments(args UNIX_COMMAND "${arch_line}") handle_arch(${args}) -ENDFOREACH(arch_line) +endforeach(arch_line) + +#strip out mutex archs +foreach(mutex_line ${mutex_lines}) + separate_arguments(args UNIX_COMMAND "${mutex_line}") + remove_mutex(${args}) +endforeach(mutex_line) -MESSAGE(STATUS "Available arches: ${available_arches}") +message(STATUS "Available arches: ${available_arches}") ######################################################################## # Parse the machines xml file: @@ -73,61 +94,61 @@ MESSAGE(STATUS "Available arches: ${available_arches}") # Build a list of supported machines and the machine definitions. ######################################################################## #extract the machine lines from the xml file using crazy python -EXECUTE_PROCESS( +execute_process( COMMAND ${PYTHON_EXECUTABLE} -c "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))" OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE ) -MACRO(handle_machine1 name) - UNSET(machine_flags) - STRING(TOUPPER LV_MACHINE_${name} machine_def) +macro(handle_machine1 name) + unset(machine_flags) + string(TOUPPER LV_MACHINE_${name} machine_def) #check if all the arches are supported - FOREACH(arch ${ARGN}) - SET(is_match ${have_${arch}}) - IF(NOT is_match) - SET(is_match FALSE) - BREAK() - ENDIF(NOT is_match) - SET(machine_flags "${machine_flags} ${${arch}_flag}") - ENDFOREACH(arch) - - IF(is_match) + foreach(arch ${ARGN}) + set(is_match ${have_${arch}}) + if(NOT is_match) + set(is_match FALSE) + break() + endif(NOT is_match) + set(machine_flags "${machine_flags} ${${arch}_flag}") + endforeach(arch) + + if(is_match) #this is a match, append the source and set its flags - SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) - SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) - LIST(APPEND machine_sources ${machine_source}) - LIST(APPEND machine_defs ${machine_def}) - LIST(APPEND available_machines ${name}) - ENDIF() -ENDMACRO(handle_machine1) - -MACRO(handle_machine name) - SET(arches ${ARGN}) - LIST(FIND arches "32|64" index) - IF(${index} EQUAL -1) + set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) + set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) + list(APPEND machine_sources ${machine_source}) + list(APPEND machine_defs ${machine_def}) + list(APPEND available_machines ${name}) + endif() +endmacro(handle_machine1) + +macro(handle_machine name) + set(arches ${ARGN}) + list(FIND arches "32|64" index) + if(${index} EQUAL -1) handle_machine1(${name} ${arches}) - ELSE() - LIST(REMOVE_ITEM arches "32|64") + else() + list(REMOVE_ITEM arches "32|64") handle_machine1(${name}_32 32 ${arches}) handle_machine1(${name}_64 64 ${arches}) - ENDIF() -ENDMACRO(handle_machine) + endif() +endmacro(handle_machine) #setup the available machines -FOREACH(machine_line ${machine_lines}) - SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}") +foreach(machine_line ${machine_lines}) + separate_arguments(args UNIX_COMMAND "${machine_line}") handle_machine(${args}) -ENDFOREACH(machine_line) +endforeach(machine_line) -MESSAGE(STATUS "Available machines: ${available_machines}") +message(STATUS "Available machines: ${available_machines}") ######################################################################## # Create rules to run the volk generator ######################################################################## #list of the generated sources -SET(volk_gen_sources +set(volk_gen_sources ${CMAKE_BINARY_DIR}/include/volk/volk.h ${CMAKE_BINARY_DIR}/lib/volk.c ${CMAKE_BINARY_DIR}/lib/volk_init.h @@ -143,14 +164,24 @@ SET(volk_gen_sources ) #dependencies are all python, xml, and header implementation files -FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) -FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) -FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) +file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +file(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) + +#make sure we can use -B with python (introduced in 2.6) +execute_process( + COMMAND ${PYTHON_EXECUTABLE} -B -c "" + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT +) +if(PYTHON_HAS_DASH_B_RESULT EQUAL 0) + set(PYTHON_DASH_B "-B") +endif() -ADD_CUSTOM_COMMAND( +add_custom_command( OUTPUT ${volk_gen_sources} DEPENDS ${xml_files} ${py_files} ${h_files} - COMMAND ${PYTHON_EXECUTABLE} -B + COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} ${CMAKE_SOURCE_DIR}/gen/volk_register.py ${CMAKE_BINARY_DIR} ) @@ -158,104 +189,105 @@ ADD_CUSTOM_COMMAND( ######################################################################## # Handle orc support ######################################################################## -FIND_PACKAGE(PkgConfig) -IF(PKG_CONFIG_FOUND) -PKG_CHECK_MODULES(ORC "orc-0.4") -ENDIF(PKG_CONFIG_FOUND) +find_package(PkgConfig) +if(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4 > 0.4.11") +endif(PKG_CONFIG_FOUND) -FIND_PROGRAM(ORCC_EXECUTABLE orcc) +find_program(ORCC_EXECUTABLE orcc) -IF(ORC_FOUND AND ORCC_EXECUTABLE) +if(ORC_FOUND AND ORCC_EXECUTABLE) #setup orc library usage - INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) - LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) - ADD_DEFINITIONS(-DLV_HAVE_ORC) + include_directories(${ORC_INCLUDE_DIRS}) + link_directories(${ORC_LIBRARY_DIRS}) + add_definitions(-DLV_HAVE_ORC) #setup orc functions - FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) - FOREACH(orc_file ${orc_files}) + file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + foreach(orc_file ${orc_files}) #extract the name for the generated c source from the orc file - GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE) - SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + get_filename_component(orc_file_name_we ${orc_file} NAME_WE) + set(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) #create a rule to generate the source and add to the list of sources - ADD_CUSTOM_COMMAND( + add_custom_command( COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} DEPENDS ${orc_file} OUTPUT ${orcc_gen} ) - LIST(APPEND volk_sources ${orcc_gen}) + list(APPEND volk_sources ${orcc_gen}) - ENDFOREACH(orc_file) -ELSE() - MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...") -ENDIF() + endforeach(orc_file) +else() + message(STATUS "Did not find liborc and orcc, disabling orc support...") +endif() ######################################################################## # Setup the volk sources list and library ######################################################################## -IF(NOT WIN32) - ADD_DEFINITIONS(-fvisibility=hidden) -ENDIF() +if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) + #http://gcc.gnu.org/wiki/Visibility + add_definitions(-fvisibility=hidden) +endif() -INCLUDE_DIRECTORIES( - ${CMAKE_SOURCE_DIR}/include +include_directories( ${CMAKE_BINARY_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} ) -LIST(APPEND volk_sources +list(APPEND volk_sources ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c ${volk_gen_sources} ) #set the machine definitions where applicable -SET_SOURCE_FILES_PROPERTIES( +set_source_files_properties( ${CMAKE_CURRENT_BINARY_DIR}/volk.c ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") -IF(MSVC) +if(MSVC) #add compatibility includes for stdint types - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) + include_directories(${CMAKE_SOURCE_DIR}/msvc) #compile the sources as C++ due to the lack of complex.h under MSVC - SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) -ENDIF(MSVC) + set_source_files_properties(${volk_sources} PROPERTIES LANGUAGE CXX) +endif(MSVC) #create the volk runtime library -ADD_LIBRARY(volk SHARED ${volk_sources}) -TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) -SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) -SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") - -INSTALL(TARGETS volk - LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file - ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file - RUNTIME DESTINATION bin # .dll file +add_library(volk SHARED ${volk_sources}) +target_link_libraries(volk ${ORC_LIBRARIES}) +set_target_properties(volk PROPERTIES SOVERSION ${LIBVER}) +set_target_properties(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") + +install(TARGETS volk + LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_runtime" # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_devel" # .lib file + RUNTIME DESTINATION bin COMPONENT "volk_runtime" # .dll file ) ######################################################################## # Build the QA test application ######################################################################## -FIND_PACKAGE(Boost COMPONENTS unit_test_framework) +find_package(Boost COMPONENTS unit_test_framework) -IF(Boost_FOUND) +if(Boost_FOUND) -SET_SOURCE_FILES_PROPERTIES( +set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" ) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) +include_directories(${Boost_INCLUDE_DIRS}) +link_directories(${Boost_LIBRARY_DIRS}) -ADD_EXECUTABLE(test_all +add_executable(test_all ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc ) -TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) -ADD_TEST(qa_volk_test_all test_all) +target_link_libraries(test_all volk ${Boost_LIBRARIES}) +#ADD_TEST(qa_volk_test_all test_all) -ENDIF() +endif() diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 7f86dd78b..9bb515e9f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -35,7 +35,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } else { float int_max = float(uint64_t(2) << (type.size*8)); if(type.is_signed) int_max /= 2.0; - for(int i=0; i<n; i++) { + for(unsigned int i=0; i<n; i++) { float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; //man i really don't know how to do this in a more clever way, you have to cast down at some point switch(type.size) { @@ -91,7 +91,7 @@ volk_type_t volk_type_from_string(std::string name) { } //get the data size - int last_size_pos = name.find_last_of("0123456789"); + size_t last_size_pos = name.find_last_of("0123456789"); if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); //will throw if malformed int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1)); @@ -99,7 +99,7 @@ volk_type_t volk_type_from_string(std::string name) { assert(((size % 8) == 0) && (size <= 64) && (size != 0)); type.size = size/8; //in bytes - for(int i=last_size_pos+1; i < name.size(); i++) { + for(size_t i=last_size_pos+1; i < name.size(); i++) { switch (name[i]) { case 'f': type.is_float = true; @@ -202,7 +202,7 @@ template <class t> bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; - for(int i=0; i<vlen; i++) { + for(unsigned int i=0; i<vlen; i++) { if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { fail=true; @@ -219,7 +219,7 @@ template <class t> bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; - for(int i=0; i<vlen; i++) { + for(unsigned int i=0; i<vlen; i++) { if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { fail=true; if(print_max_errs-- > 0) { @@ -270,7 +270,7 @@ bool run_volk_tests(struct volk_func_desc desc, //pull the input scalars into their own vector std::vector<volk_type_t> inputsc; - for(int i=0; i<inputsig.size(); i++) { + for(size_t i=0; i<inputsig.size(); i++) { if(inputsig[i].is_scalar) { inputsc.push_back(inputsig[i]); inputsig.erase(inputsig.begin() + i); @@ -284,18 +284,18 @@ bool run_volk_tests(struct volk_func_desc desc, if(!sig.is_scalar) //we don't make buffers for scalars inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1))); } - for(int i=0; i<inbuffs.size(); i++) { + for(size_t i=0; i<inbuffs.size(); i++) { load_random_data(inbuffs[i], inputsig[i], vlen); } //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch std::vector<std::vector<void *> > test_data; - for(int i=0; i<arch_list.size(); i++) { + for(size_t i=0; i<arch_list.size(); i++) { std::vector<void *> arch_buffs; - for(int j=0; j<outputsig.size(); j++) { + for(size_t j=0; j<outputsig.size(); j++) { arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1))); } - for(int j=0; j<inputsig.size(); j++) { + for(size_t j=0; j<inputsig.size(); j++) { arch_buffs.push_back(inbuffs[j]); } test_data.push_back(arch_buffs); @@ -308,7 +308,7 @@ bool run_volk_tests(struct volk_func_desc desc, //now run the test clock_t start, end; std::vector<double> profile_times; - for(int i = 0; i < arch_list.size(); i++) { + for(size_t i = 0; i < arch_list.size(); i++) { start = clock(); switch(both_sigs.size()) { @@ -350,8 +350,8 @@ bool run_volk_tests(struct volk_func_desc desc, //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... - int generic_offset=0; - for(int i=0; i<arch_list.size(); i++) + size_t generic_offset=0; + for(size_t i=0; i<arch_list.size(); i++) if(arch_list[i] == "generic") generic_offset=i; //now compare @@ -360,10 +360,10 @@ bool run_volk_tests(struct volk_func_desc desc, bool fail = false; bool fail_global = false; std::vector<bool> arch_results; - for(int i=0; i<arch_list.size(); i++) { + for(size_t i=0; i<arch_list.size(); i++) { fail = false; if(i != generic_offset) { - for(int j=0; j<both_sigs.size(); j++) { + for(size_t j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { if(both_sigs[j].size == 8) { fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol); @@ -417,7 +417,7 @@ bool run_volk_tests(struct volk_func_desc desc, double best_time = std::numeric_limits<double>::max(); std::string best_arch = "generic"; - for(int i=0; i < arch_list.size(); i++) { + for(size_t i=0; i < arch_list.size(); i++) { if((profile_times[i] < best_time) && arch_results[i]) { best_time = profile_times[i]; best_arch = arch_list[i]; diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c index 9743c51d9..b29d5fd87 100644 --- a/volk/lib/volk_prefs.c +++ b/volk/lib/volk_prefs.c @@ -23,7 +23,7 @@ int load_preferences(struct volk_arch_pref **prefs) { //get the config path get_config_path(path); config_file = fopen(path, "r"); - if(!config_file) return; //no prefs found + if(!config_file) return n_arch_prefs; //no prefs found while(fgets(line, 512, config_file) != NULL) { if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index e10433fd0..4baa078bc 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -5,7 +5,7 @@ #include <string.h> unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { - int i; + unsigned int i; for(i=0; i<n_archs; i++) { if(!strncmp(indices[i], arch_name, 20)) { return i; @@ -17,10 +17,10 @@ unsigned int get_index(const char *indices[], unsigned int n_archs, const char * } unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { - int i; + unsigned int i; unsigned int best_val = 0; static struct volk_arch_pref *volk_arch_prefs; - static int n_arch_prefs = 0; + static unsigned int n_arch_prefs = 0; static int prefs_loaded = 0; if(!prefs_loaded) { n_arch_prefs = load_preferences(&volk_arch_prefs); |