diff options
-rw-r--r-- | volk/lib/CMakeLists.txt | 6 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 35 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 7 | ||||
-rw-r--r-- | volk/lib/volk_profile.cc | 123 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 59 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.h | 7 |
6 files changed, 229 insertions, 8 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index dfb1d4219..6198dc0e7 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -227,4 +227,10 @@ ADD_EXECUTABLE(test_all TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) ADD_TEST(qa_volk_test_all test_all) +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) + ENDIF() diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index fa091ad0d..3eb1da1f1 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -8,6 +8,7 @@ #include <list> #include <ctime> #include <cmath> +#include <limits> #include <boost/lexical_cast.hpp> #include <volk/volk.h> #include <volk/volk_cpu.h> @@ -240,7 +241,15 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector<std::string> *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test @@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //now run the test clock_t start, end; + std::vector<double> profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri bool fail = false; bool fail_global = false; + std::vector<bool> arch_results; for(int i=0; i<arch_list.size(); i++) { + fail = false; if(i != generic_offset) { for(int j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { @@ -395,6 +411,21 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); } } + arch_results.push_back(!fail); + } + + double best_time = std::numeric_limits<double>::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 304a00533..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,7 @@ #include <cstdlib> #include <string> +#include <vector> #include <volk/volk.h> #include <volk/volk_common.h> @@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); } +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc new file mode 100644 index 000000000..c4a04abf2 --- /dev/null +++ b/volk/lib/volk_profile.cc @@ -0,0 +1,123 @@ +#include "qa_utils.h" +#include <volk/volk.h> +#include <volk_rank_archs.h> +#include <boost/test/unit_test.hpp> +#include <vector> +#include <boost/foreach.hpp> +#include <iostream> +#include <fstream> + +int main(int argc, char *argv[]) { + + std::vector<std::string> results; + char path[512]; + get_config_path(path); + std::string config_path(path); + +/* + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); +*/ + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); + + load_preferences(); +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 25ad75cda..f505abeb6 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,5 +1,60 @@ -#include<volk_rank_archs.h> -#include<stdio.h> +#include <volk_rank_archs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if defined(_WIN32) +#include <Windows.h> +#endif + +void get_config_path(char *path) { + const char *suffix = "/.gnuradio/volk_config"; + memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); + strcat(path, suffix); +} + +/* + * ok so volk stuff has to be loaded piecemeal, and to avoid reading + * the whole config file in at startup we should probably create a static + * prefs struct that can be read in by rank_archs with minimal modification. + * this makes rank_archs slower and load_preferences more complex, but + * we don't have to export load_preferences and we don't have to include volk.h. + * means we need to pass the name into rank_archs, though + * problem is that names don't appear anywhere in the volk function descriptor. + * so we have to modify things to include the name in the descriptor. + * + * also means you don't have to also spec the fn name in qa_utils.h/c, you can + * pass it in along with the func_desc + * + */ + +void load_preferences(void) { + static int prefs_loaded = 0; + FILE *config_file; + char path[512], line[512], function[256], arch[64]; + + if(prefs_loaded) return; + + int n_arch_preferences = 0; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + printf("func: %s, arch: %s\n", function, arch); + //we have a function and we have an arch, let's set it + n_arch_preferences++; + } + } + + fclose(config_file); + + printf("Found %d prefs\n", n_arch_preferences); + prefs_loaded = 1; +} unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { int i = 1; diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 8fa0631ee..37a0fbc46 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -6,7 +6,12 @@ extern "C" { #endif unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - + +//////////////////////////////////////////////////////////////////////// +//get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +void get_config_path(char *); +void load_preferences(void); //FIXME DEBUG shouldn't be exported #ifdef __cplusplus } |