diff options
-rw-r--r-- | volk/CMakeLists.txt | 9 | ||||
-rw-r--r-- | volk/apps/CMakeLists.txt | 38 | ||||
-rw-r--r-- | volk/apps/volk_profile.cc | 136 | ||||
-rw-r--r-- | volk/gen/make_c.py | 14 | ||||
-rw-r--r-- | volk/gen/make_each_machine_c.py | 1 | ||||
-rw-r--r-- | volk/gen/make_machines_h.py | 1 | ||||
-rw-r--r-- | volk/include/volk/volk_prefs.h | 19 | ||||
-rw-r--r-- | volk/lib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 35 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 7 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 49 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 40 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.h | 4 | ||||
-rw-r--r-- | volk/volk.pc.in | 5 |
14 files changed, 329 insertions, 30 deletions
diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt index 51b999eee..320efa7b4 100644 --- a/volk/CMakeLists.txt +++ b/volk/CMakeLists.txt @@ -52,8 +52,8 @@ CONFIGURE_FILE( ${CMAKE_CURRENT_BINARY_DIR}/volk.pc @ONLY) -INSTALL_FILES( - ${CMAKE_CURRENT_BINARY_DIR}/volk.pc +INSTALL( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc DESTINATION lib${LIB_SUFFIX}/pkgconfig ) @@ -72,6 +72,11 @@ INSTALL( ADD_SUBDIRECTORY(lib) ######################################################################## +# And the utility apps +######################################################################## +ADD_SUBDIRECTORY(apps) + +######################################################################## # Print summary ######################################################################## MESSAGE(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}") diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt new file mode 100644 index 000000000..a0bf7e900 --- /dev/null +++ b/volk/apps/CMakeLists.txt @@ -0,0 +1,38 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Setup profiler +######################################################################## +IF(MSVC) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) +ENDIF(MSVC) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/lib + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc +) + +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc new file mode 100644 index 000000000..3fe227400 --- /dev/null +++ b/volk/apps/volk_profile.cc @@ -0,0 +1,136 @@ +#include "qa_utils.h" +extern "C" { +#include <volk/volk.h> +#include <volk/volk_prefs.h> +} +#include <vector> +#include <boost/foreach.hpp> +#include <iostream> +#include <fstream> +#include <sys/stat.h> +#include <sys/types.h> + +int main(int argc, char *argv[]) { + + std::vector<std::string> results; +/* + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); + */ + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + char path[256]; + get_config_path(path); + std::string config_path(path); + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + if(!config.is_open()) { //either we don't have write access or we don't have the dir yet + std::string dir(getenv("HOME")); + dir += "/.volk"; + if(mkdir(dir.c_str(), 0777) == -1) { + std::cout << "Error creating directory " << dir << std::endl; + return -1; + } + config.open(config_path.c_str()); + if(!config.is_open()) { + std::cout << "Error opening file " << config_path << std::endl; + return -1; + } + } + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); +} diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py index 4e67f31ff..fa08bbb0e 100644 --- a/volk/gen/make_c.py +++ b/volk/gen/make_c.py @@ -66,23 +66,11 @@ struct volk_machine *get_machine(void) { } } -static unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { - int i; - for(i=0; i<n_archs; i++) { - if(!strncmp(indices[i], arch_name, 20)) { - return i; - } - } - //something terrible should happen here - printf("Volk warning: no arch found, returning generic impl\n"); - return get_index(indices, n_archs, "generic"); //but we'll fake it for now -} - """ for i in range(len(functions)): tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n" - tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_arch_defs, get_machine()->%s_n_archs, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i]) + tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i], functions[i]) tempstring += " %s(%s);\n}\n\n" % (functions[i], my_arglist[i]) tempstring += replace_volk.sub("p", functions[i]) + " " + functions[i] + " = &get_" + functions[i] + ";\n\n" tempstring += "void %s_manual%s\n" % (functions[i], arched_arglist[i]) diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py index a7d3bb752..dfb7011b4 100644 --- a/volk/gen/make_each_machine_c.py +++ b/volk/gen/make_each_machine_c.py @@ -41,6 +41,7 @@ def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglis #fill in the description for each function for i in range(len(functions)): + tempstring += " \"%s\",\n"%functions[i] tempstring += " {%s},\n"%(', '.join(['"%s"'%tag for tag in machine_taglists[i]])) tempstring += " {%s},\n"%(', '.join([' | '.join(['(1 << LV_%s)'%fc for fc in fcount]) for fcount in machine_fcountlists[i]])) tempstring += " {%s},\n"%(', '.join(['%s_%s'%(functions[i], tag) for tag in machine_taglists[i]])) diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py index 09ada3e0d..563de18a6 100644 --- a/volk/gen/make_machines_h.py +++ b/volk/gen/make_machines_h.py @@ -35,6 +35,7 @@ struct volk_machine { const char *name; """ for function in functions: + tempstring += " const char *%s_name;\n"%function tempstring += " const char *%s_indices[%d];\n"%(function, len(archs)) tempstring += " const int %s_arch_defs[%d];\n"%(function, len(archs)) tempstring += " const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs)) diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h new file mode 100644 index 000000000..77bcb709b --- /dev/null +++ b/volk/include/volk/volk_prefs.h @@ -0,0 +1,19 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +struct volk_arch_pref { + char name[128]; + char arch[32]; +}; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +void get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +int load_preferences(struct volk_arch_pref **); + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index b0d60781f..e8fe5cc58 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -200,6 +200,7 @@ INCLUDE_DIRECTORIES( ) LIST(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c ${volk_gen_sources} ) diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index fa091ad0d..3eb1da1f1 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -8,6 +8,7 @@ #include <list> #include <ctime> #include <cmath> +#include <limits> #include <boost/lexical_cast.hpp> #include <volk/volk.h> #include <volk/volk_cpu.h> @@ -240,7 +241,15 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector<std::string> *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test @@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //now run the test clock_t start, end; + std::vector<double> profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri bool fail = false; bool fail_global = false; + std::vector<bool> arch_results; for(int i=0; i<arch_list.size(); i++) { + fail = false; if(i != generic_offset) { for(int j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { @@ -395,6 +411,21 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); } } + arch_results.push_back(!fail); + } + + double best_time = std::numeric_limits<double>::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 304a00533..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,7 @@ #include <cstdlib> #include <string> +#include <vector> #include <volk/volk.h> #include <volk/volk_common.h> @@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); } +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..ebfe3bc40 --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <volk/volk_prefs.h> + +//#if defined(_WIN32) +//#include <Windows.h> +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.volk/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can suck my balls +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 25ad75cda..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,10 +1,40 @@ -#include<volk_rank_archs.h> -#include<stdio.h> +#include <volk_rank_archs.h> +#include <volk/volk_prefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { - int i = 1; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i<n_archs; i++) { + if(!strncmp(indices[i], arch_name, 20)) { + return i; + } + } + //something terrible should happen here + printf("Volk warning: no arch found, returning generic impl\n"); + return get_index(indices, n_archs, "generic"); //but we'll fake it for now +} + +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { + int i; unsigned int best_val = 0; - for(; i < n_archs; ++i) { + static struct volk_arch_pref *volk_arch_prefs; + static int n_arch_prefs = 0; + static int prefs_loaded = 0; + if(!prefs_loaded) { + n_arch_prefs = load_preferences(&volk_arch_prefs); + prefs_loaded = 1; + } + + //now look for the function name in the prefs list + for(i=0; i < n_arch_prefs; i++) { + if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it + return get_index(indices, n_archs, volk_arch_prefs[i].arch); + } + } + + for(i=1; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 8fa0631ee..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,8 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } diff --git a/volk/volk.pc.in b/volk/volk.pc.in index 85425ba64..58e976786 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -5,11 +5,10 @@ includedir=@includedir@ LV_CXXFLAGS=@LV_CXXFLAGS@ - Name: volk -Description: VOLK.. Vector Optimized Library of Kernels +Description: VOLK: Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime @LV_ORC_PKGCONFIG@ +Libs: -lvolk Cflags: -I${includedir} ${LV_CXXFLAGS} |