From e3997ada93a25569a05bbfd615d73d00cee6eca5 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 10 May 2011 21:52:23 -0700 Subject: Volk: initial profiling support. Profiling works, reading doesn't yet. Need to add name field to volk arch_defs --- volk/lib/CMakeLists.txt | 6 +++ volk/lib/qa_utils.cc | 35 ++++++++++++- volk/lib/qa_utils.h | 7 +-- volk/lib/volk_profile.cc | 123 +++++++++++++++++++++++++++++++++++++++++++++ volk/lib/volk_rank_archs.c | 59 +++++++++++++++++++++- volk/lib/volk_rank_archs.h | 7 ++- 6 files changed, 229 insertions(+), 8 deletions(-) create mode 100644 volk/lib/volk_profile.cc diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index dfb1d4219..6198dc0e7 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -227,4 +227,10 @@ ADD_EXECUTABLE(test_all TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) ADD_TEST(qa_volk_test_all test_all) +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) + ENDIF() diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index fa091ad0d..3eb1da1f1 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -240,7 +241,15 @@ public: private: std::list > _mems; }; -bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test @@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //now run the test clock_t start, end; + std::vector profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri bool fail = false; bool fail_global = false; + std::vector arch_results; for(int i=0; i::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 304a00533..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); } +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc new file mode 100644 index 000000000..c4a04abf2 --- /dev/null +++ b/volk/lib/volk_profile.cc @@ -0,0 +1,123 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + + std::vector results; + char path[512]; + get_config_path(path); + std::string config_path(path); + +/* + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); +*/ + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); + + load_preferences(); +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 25ad75cda..f505abeb6 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,5 +1,60 @@ -#include -#include +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#endif + +void get_config_path(char *path) { + const char *suffix = "/.gnuradio/volk_config"; + memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); + strcat(path, suffix); +} + +/* + * ok so volk stuff has to be loaded piecemeal, and to avoid reading + * the whole config file in at startup we should probably create a static + * prefs struct that can be read in by rank_archs with minimal modification. + * this makes rank_archs slower and load_preferences more complex, but + * we don't have to export load_preferences and we don't have to include volk.h. + * means we need to pass the name into rank_archs, though + * problem is that names don't appear anywhere in the volk function descriptor. + * so we have to modify things to include the name in the descriptor. + * + * also means you don't have to also spec the fn name in qa_utils.h/c, you can + * pass it in along with the func_desc + * + */ + +void load_preferences(void) { + static int prefs_loaded = 0; + FILE *config_file; + char path[512], line[512], function[256], arch[64]; + + if(prefs_loaded) return; + + int n_arch_preferences = 0; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + printf("func: %s, arch: %s\n", function, arch); + //we have a function and we have an arch, let's set it + n_arch_preferences++; + } + } + + fclose(config_file); + + printf("Found %d prefs\n", n_arch_preferences); + prefs_loaded = 1; +} unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { int i = 1; diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 8fa0631ee..37a0fbc46 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -6,7 +6,12 @@ extern "C" { #endif unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - + +//////////////////////////////////////////////////////////////////////// +//get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +void get_config_path(char *); +void load_preferences(void); //FIXME DEBUG shouldn't be exported #ifdef __cplusplus } -- cgit From b50dbc4498842fecd7f0c6adc22f25726f8d27d3 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 11 May 2011 21:45:03 -0700 Subject: Volk: Profiler is in apps/ now. Added name to function info. Going to C++-ify the whole thing. --- volk/CMakeLists.txt | 5 ++ volk/apps/CMakeLists.txt | 39 +++++++++++++ volk/apps/volk_profile.cc | 119 ++++++++++++++++++++++++++++++++++++++ volk/gen/make_c.py | 2 +- volk/gen/make_each_machine_c.py | 1 + volk/gen/make_machines_h.py | 1 + volk/lib/CMakeLists.txt | 6 -- volk/lib/volk_profile.cc | 123 ---------------------------------------- volk/lib/volk_rank_archs.c | 15 ++++- volk/lib/volk_rank_archs.h | 8 +-- 10 files changed, 181 insertions(+), 138 deletions(-) create mode 100644 volk/apps/CMakeLists.txt create mode 100644 volk/apps/volk_profile.cc delete mode 100644 volk/lib/volk_profile.cc diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt index ab65f8791..9c95fe6cd 100644 --- a/volk/CMakeLists.txt +++ b/volk/CMakeLists.txt @@ -95,6 +95,11 @@ INSTALL( ######################################################################## ADD_SUBDIRECTORY(lib) +######################################################################## +# And the utility apps +######################################################################## +ADD_SUBDIRECTORY(apps) + ######################################################################## # Print summary ######################################################################## diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt new file mode 100644 index 000000000..752cbc679 --- /dev/null +++ b/volk/apps/CMakeLists.txt @@ -0,0 +1,39 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +######################################################################## +# Setup profiler +######################################################################## +IF(MSVC) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) +ENDIF(MSVC) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/lib + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc + ${CMAKE_SOURCE_DIR}/lib/volk_prefs.cc +) + +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc new file mode 100644 index 000000000..eb7b70887 --- /dev/null +++ b/volk/apps/volk_profile.cc @@ -0,0 +1,119 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + + std::vector results; + std::string config_path = get_config_path(); +/* + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); +*/ + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); + + load_preferences(); +} diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py index 4e67f31ff..11b614644 100644 --- a/volk/gen/make_c.py +++ b/volk/gen/make_c.py @@ -82,7 +82,7 @@ static unsigned int get_index(const char *indices[], unsigned int n_archs, const for i in range(len(functions)): tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n" - tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_arch_defs, get_machine()->%s_n_archs, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i]) + tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i]) tempstring += " %s(%s);\n}\n\n" % (functions[i], my_arglist[i]) tempstring += replace_volk.sub("p", functions[i]) + " " + functions[i] + " = &get_" + functions[i] + ";\n\n" tempstring += "void %s_manual%s\n" % (functions[i], arched_arglist[i]) diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py index a7d3bb752..dfb7011b4 100644 --- a/volk/gen/make_each_machine_c.py +++ b/volk/gen/make_each_machine_c.py @@ -41,6 +41,7 @@ def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglis #fill in the description for each function for i in range(len(functions)): + tempstring += " \"%s\",\n"%functions[i] tempstring += " {%s},\n"%(', '.join(['"%s"'%tag for tag in machine_taglists[i]])) tempstring += " {%s},\n"%(', '.join([' | '.join(['(1 << LV_%s)'%fc for fc in fcount]) for fcount in machine_fcountlists[i]])) tempstring += " {%s},\n"%(', '.join(['%s_%s'%(functions[i], tag) for tag in machine_taglists[i]])) diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py index 09ada3e0d..563de18a6 100644 --- a/volk/gen/make_machines_h.py +++ b/volk/gen/make_machines_h.py @@ -35,6 +35,7 @@ struct volk_machine { const char *name; """ for function in functions: + tempstring += " const char *%s_name;\n"%function tempstring += " const char *%s_indices[%d];\n"%(function, len(archs)) tempstring += " const int %s_arch_defs[%d];\n"%(function, len(archs)) tempstring += " const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs)) diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index 6198dc0e7..dfb1d4219 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -227,10 +227,4 @@ ADD_EXECUTABLE(test_all TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) ADD_TEST(qa_volk_test_all test_all) -ADD_EXECUTABLE(volk_profile - ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc - ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc -) -TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) - ENDIF() diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc deleted file mode 100644 index c4a04abf2..000000000 --- a/volk/lib/volk_profile.cc +++ /dev/null @@ -1,123 +0,0 @@ -#include "qa_utils.h" -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char *argv[]) { - - std::vector results; - char path[512]; - get_config_path(path); - std::string config_path(path); - -/* - //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); - //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); - VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); - VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); - //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); - //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); - VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); - //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); - VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); - //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); - VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); - //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); - VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); - VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); - //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); - //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); - VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); - VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); - VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); -*/ - VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); - VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); - - std::ofstream config; - std::cout << "filename: " << config_path << std::endl; - config.open(config_path.c_str()); - - config << "\ -#this file is generated by volk_profile.\n\ -#the function name is followed by the preferred architecture.\n\ -"; - - BOOST_FOREACH(std::string result, results) { - config << result << std::endl; - } - config.close(); - - load_preferences(); -} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index f505abeb6..14f1789da 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -7,6 +7,8 @@ #include #endif +//this should be used by the profiler app to find the path as well +//possibly all this stuff should go in a separate volk_prefs.cc void get_config_path(char *path) { const char *suffix = "/.gnuradio/volk_config"; memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); @@ -25,8 +27,19 @@ void get_config_path(char *path) { * * also means you don't have to also spec the fn name in qa_utils.h/c, you can * pass it in along with the func_desc + * + * your prefs reader should also have a prefs writer which takes a vector of prefs and writes them + * then your profiler can just write the prefs by passing that out * */ + +struct volk_arch_pref { + const char *name; + const char *arch; +}; + +//if we end up with more this will have to use realloc +struct volk_arch_pref volk_arch_prefs[400]; void load_preferences(void) { static int prefs_loaded = 0; @@ -56,7 +69,7 @@ void load_preferences(void) { prefs_loaded = 1; } -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { int i = 1; unsigned int best_val = 0; for(; i < n_archs; ++i) { diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 37a0fbc46..ba248aa59 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,13 +5,7 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - -//////////////////////////////////////////////////////////////////////// -//get path to volk_config profiling info -//////////////////////////////////////////////////////////////////////// -void get_config_path(char *); -void load_preferences(void); //FIXME DEBUG shouldn't be exported +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } -- cgit From 30fdc38d20d4e38908059b6e351c550de5741621 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 14:21:17 -0700 Subject: Volk: profiling works. loads prefs on init. volk_rank_archs looks in prefs first. --- volk/apps/CMakeLists.txt | 1 - volk/apps/volk_profile.cc | 11 +++--- volk/gen/make_c.py | 14 +------ volk/lib/CMakeLists.txt | 1 + volk/lib/volk_rank_archs.c | 92 ++++++++++++++-------------------------------- volk/lib/volk_rank_archs.h | 3 +- 6 files changed, 37 insertions(+), 85 deletions(-) diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt index 752cbc679..a0bf7e900 100644 --- a/volk/apps/CMakeLists.txt +++ b/volk/apps/CMakeLists.txt @@ -33,7 +33,6 @@ INCLUDE_DIRECTORIES( ADD_EXECUTABLE(volk_profile ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc - ${CMAKE_SOURCE_DIR}/lib/volk_prefs.cc ) TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index eb7b70887..49e6db3fc 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -1,6 +1,8 @@ #include "qa_utils.h" +extern "C" { #include #include +} #include #include #include @@ -9,8 +11,10 @@ int main(int argc, char *argv[]) { std::vector results; - std::string config_path = get_config_path(); -/* + char path[256]; + get_config_path(path); + std::string config_path(path); + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); @@ -97,7 +101,6 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); -*/ VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); @@ -114,6 +117,4 @@ int main(int argc, char *argv[]) { config << result << std::endl; } config.close(); - - load_preferences(); } diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py index 11b614644..fa08bbb0e 100644 --- a/volk/gen/make_c.py +++ b/volk/gen/make_c.py @@ -66,23 +66,11 @@ struct volk_machine *get_machine(void) { } } -static unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { - int i; - for(i=0; i +#include #include #include #include -#if defined(_WIN32) -#include -#endif - -//this should be used by the profiler app to find the path as well -//possibly all this stuff should go in a separate volk_prefs.cc -void get_config_path(char *path) { - const char *suffix = "/.gnuradio/volk_config"; - memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); - strcat(path, suffix); -} - -/* - * ok so volk stuff has to be loaded piecemeal, and to avoid reading - * the whole config file in at startup we should probably create a static - * prefs struct that can be read in by rank_archs with minimal modification. - * this makes rank_archs slower and load_preferences more complex, but - * we don't have to export load_preferences and we don't have to include volk.h. - * means we need to pass the name into rank_archs, though - * problem is that names don't appear anywhere in the volk function descriptor. - * so we have to modify things to include the name in the descriptor. - * - * also means you don't have to also spec the fn name in qa_utils.h/c, you can - * pass it in along with the func_desc - * - * your prefs reader should also have a prefs writer which takes a vector of prefs and writes them - * then your profiler can just write the prefs by passing that out - * - */ - -struct volk_arch_pref { - const char *name; - const char *arch; -}; - -//if we end up with more this will have to use realloc -struct volk_arch_pref volk_arch_prefs[400]; - -void load_preferences(void) { - static int prefs_loaded = 0; - FILE *config_file; - char path[512], line[512], function[256], arch[64]; - - if(prefs_loaded) return; - - int n_arch_preferences = 0; - - //get the config path - get_config_path(path); - config_file = fopen(path, "r"); - if(!config_file) return; //no prefs found - - while(fgets(line, 512, config_file) != NULL) { - if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { - printf("func: %s, arch: %s\n", function, arch); - //we have a function and we have an arch, let's set it - n_arch_preferences++; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index ba248aa59..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,7 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } -- cgit From c21132e07100c62182a27a8e282cb72463dd2963 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 14:50:48 -0700 Subject: Volk: actually return the preferred arch --- volk/lib/volk_rank_archs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 1b75af8f4..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -30,7 +30,7 @@ unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsign //now look for the function name in the prefs list for(i=0; i < n_arch_prefs; i++) { if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it - best_val = get_index(indices, n_archs, volk_arch_prefs[i].arch); + return get_index(indices, n_archs, volk_arch_prefs[i].arch); } } -- cgit From a1b9b5c16c53bedfe8ebab39055a36dee387a9a4 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:07:31 -0700 Subject: Volk: forgot to add prefs.c/h to git... --- volk/include/volk/volk_prefs.h | 19 ++++++++++++++++ volk/lib/volk_prefs.c | 49 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 volk/include/volk/volk_prefs.h create mode 100644 volk/lib/volk_prefs.c diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h new file mode 100644 index 000000000..77bcb709b --- /dev/null +++ b/volk/include/volk/volk_prefs.h @@ -0,0 +1,19 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +struct volk_arch_pref { + char name[128]; + char arch[32]; +}; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +void get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +int load_preferences(struct volk_arch_pref **); + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..bd15c130e --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +//#if defined(_WIN32) +//#include +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.gnuradio/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can suck my balls +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} -- cgit From 644d155a1f5dbc10985c2e0a7c0df4d7680cbf31 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:23:35 -0700 Subject: Volk: fix volk.pc.in --- volk/volk.pc.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/volk/volk.pc.in b/volk/volk.pc.in index 85425ba64..58e976786 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -5,11 +5,10 @@ includedir=@includedir@ LV_CXXFLAGS=@LV_CXXFLAGS@ - Name: volk -Description: VOLK.. Vector Optimized Library of Kernels +Description: VOLK: Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime @LV_ORC_PKGCONFIG@ +Libs: -lvolk Cflags: -I${includedir} ${LV_CXXFLAGS} -- cgit From 256304ee92f49b536358252693a2083f211e1e37 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:27:59 -0700 Subject: Volk: fixed pkgconfig install --- volk/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt index 9c95fe6cd..5a3da11d4 100644 --- a/volk/CMakeLists.txt +++ b/volk/CMakeLists.txt @@ -76,8 +76,8 @@ CONFIGURE_FILE( ${CMAKE_CURRENT_BINARY_DIR}/volk.pc @ONLY) -INSTALL_FILES( - ${CMAKE_CURRENT_BINARY_DIR}/volk.pc +INSTALL( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc DESTINATION lib${LIB_SUFFIX}/pkgconfig ) -- cgit From b0e781a55387e02ef8126219ccfe8b3c48a838f5 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:39:56 -0700 Subject: Volk: move configuration into ~/.volk instead of ~/.gnuradio, add ability to create dir in profiler if not exist --- volk/apps/volk_profile.cc | 26 +++++++++++++++++++++----- volk/lib/volk_prefs.c | 2 +- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc index 49e6db3fc..3fe227400 100644 --- a/volk/apps/volk_profile.cc +++ b/volk/apps/volk_profile.cc @@ -7,14 +7,13 @@ extern "C" { #include #include #include +#include +#include int main(int argc, char *argv[]) { - std::vector results; - char path[256]; - get_config_path(path); - std::string config_path(path); - + std::vector results; +/* //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); @@ -101,12 +100,29 @@ int main(int argc, char *argv[]) { VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); + */ VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + char path[256]; + get_config_path(path); + std::string config_path(path); std::ofstream config; std::cout << "filename: " << config_path << std::endl; config.open(config_path.c_str()); + if(!config.is_open()) { //either we don't have write access or we don't have the dir yet + std::string dir(getenv("HOME")); + dir += "/.volk"; + if(mkdir(dir.c_str(), 0777) == -1) { + std::cout << "Error creating directory " << dir << std::endl; + return -1; + } + config.open(config_path.c_str()); + if(!config.is_open()) { + std::cout << "Error opening file " << config_path << std::endl; + return -1; + } + } config << "\ #this file is generated by volk_profile.\n\ diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c index bd15c130e..ebfe3bc40 100644 --- a/volk/lib/volk_prefs.c +++ b/volk/lib/volk_prefs.c @@ -8,7 +8,7 @@ //#endif void get_config_path(char *path) { - const char *suffix = "/.gnuradio/volk_config"; + const char *suffix = "/.volk/volk_config"; strcpy(path, getenv("HOME")); strcat(path, suffix); } -- cgit