summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
authorNick Foster2011-05-10 21:52:23 -0700
committerNick Foster2011-05-10 21:52:23 -0700
commite3997ada93a25569a05bbfd615d73d00cee6eca5 (patch)
tree6d9c6b83cf0da7bd1768989f857aab1d2505cb88 /volk/lib
parent796527604c579951fbd5fbc3e5ed41a17b61610e (diff)
downloadgnuradio-e3997ada93a25569a05bbfd615d73d00cee6eca5.tar.gz
gnuradio-e3997ada93a25569a05bbfd615d73d00cee6eca5.tar.bz2
gnuradio-e3997ada93a25569a05bbfd615d73d00cee6eca5.zip
Volk: initial profiling support. Profiling works, reading doesn't yet. Need to add name field to volk arch_defs
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/CMakeLists.txt6
-rw-r--r--volk/lib/qa_utils.cc35
-rw-r--r--volk/lib/qa_utils.h7
-rw-r--r--volk/lib/volk_profile.cc123
-rw-r--r--volk/lib/volk_rank_archs.c59
-rw-r--r--volk/lib/volk_rank_archs.h7
6 files changed, 229 insertions, 8 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
index dfb1d4219..6198dc0e7 100644
--- a/volk/lib/CMakeLists.txt
+++ b/volk/lib/CMakeLists.txt
@@ -227,4 +227,10 @@ ADD_EXECUTABLE(test_all
TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES})
ADD_TEST(qa_volk_test_all test_all)
+ADD_EXECUTABLE(volk_profile
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
+)
+TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES})
+
ENDIF()
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index fa091ad0d..3eb1da1f1 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -8,6 +8,7 @@
#include <list>
#include <ctime>
#include <cmath>
+#include <limits>
#include <boost/lexical_cast.hpp>
#include <volk/volk.h>
#include <volk/volk_cpu.h>
@@ -240,7 +241,15 @@ public:
private: std::list<std::vector<char> > _mems;
};
-bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
+bool run_volk_tests(struct volk_func_desc desc,
+ void (*manual_func)(),
+ std::string name,
+ float tol,
+ float scalar,
+ int vlen,
+ int iter,
+ std::vector<std::string> *best_arch_vector = 0
+ ) {
std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
//first let's get a list of available architectures for the test
@@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
//now run the test
clock_t start, end;
+ std::vector<double> profile_times;
for(int i = 0; i < arch_list.size(); i++) {
start = clock();
@@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
}
end = clock();
- std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+ double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
+
+ profile_times.push_back(arch_time);
}
+
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
int generic_offset=0;
@@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
bool fail = false;
bool fail_global = false;
+ std::vector<bool> arch_results;
for(int i=0; i<arch_list.size(); i++) {
+ fail = false;
if(i != generic_offset) {
for(int j=0; j<both_sigs.size(); j++) {
if(both_sigs[j].is_float) {
@@ -395,6 +411,21 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
//fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
}
}
+ arch_results.push_back(!fail);
+ }
+
+ double best_time = std::numeric_limits<double>::max();
+ std::string best_arch = "generic";
+ for(int i=0; i < arch_list.size(); i++) {
+ if((profile_times[i] < best_time) && arch_results[i]) {
+ best_time = profile_times[i];
+ best_arch = arch_list[i];
+ }
+ }
+
+ std::cout << "Best arch: " << best_arch << std::endl;
+ if(best_arch_vector) {
+ best_arch_vector->push_back(name + std::string(" ") + best_arch);
}
return fail_global;
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 304a00533..a1bc1f20c 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -3,6 +3,7 @@
#include <cstdlib>
#include <string>
+#include <vector>
#include <volk/volk.h>
#include <volk/volk_common.h>
@@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
-bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int);
-
-#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); }
+bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *);
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); }
+#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results)
typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc
new file mode 100644
index 000000000..c4a04abf2
--- /dev/null
+++ b/volk/lib/volk_profile.cc
@@ -0,0 +1,123 @@
+#include "qa_utils.h"
+#include <volk/volk.h>
+#include <volk_rank_archs.h>
+#include <boost/test/unit_test.hpp>
+#include <vector>
+#include <boost/foreach.hpp>
+#include <iostream>
+#include <fstream>
+
+int main(int argc, char *argv[]) {
+
+ std::vector<std::string> results;
+ char path[512];
+ get_config_path(path);
+ std::string config_path(path);
+
+/*
+ //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results);
+ //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results);
+ VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results);
+ //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results);
+ //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results);
+ VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results);
+ //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results);
+ VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results);
+ //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results);
+ VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results);
+ //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results);
+ VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results);
+ VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results);
+ //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results);
+ //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results);
+ VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results);
+ VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results);
+ VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results);
+*/
+ VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results);
+ VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
+
+ std::ofstream config;
+ std::cout << "filename: " << config_path << std::endl;
+ config.open(config_path.c_str());
+
+ config << "\
+#this file is generated by volk_profile.\n\
+#the function name is followed by the preferred architecture.\n\
+";
+
+ BOOST_FOREACH(std::string result, results) {
+ config << result << std::endl;
+ }
+ config.close();
+
+ load_preferences();
+}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index 25ad75cda..f505abeb6 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -1,5 +1,60 @@
-#include<volk_rank_archs.h>
-#include<stdio.h>
+#include <volk_rank_archs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(_WIN32)
+#include <Windows.h>
+#endif
+
+void get_config_path(char *path) {
+ const char *suffix = "/.gnuradio/volk_config";
+ memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1);
+ strcat(path, suffix);
+}
+
+/*
+ * ok so volk stuff has to be loaded piecemeal, and to avoid reading
+ * the whole config file in at startup we should probably create a static
+ * prefs struct that can be read in by rank_archs with minimal modification.
+ * this makes rank_archs slower and load_preferences more complex, but
+ * we don't have to export load_preferences and we don't have to include volk.h.
+ * means we need to pass the name into rank_archs, though
+ * problem is that names don't appear anywhere in the volk function descriptor.
+ * so we have to modify things to include the name in the descriptor.
+ *
+ * also means you don't have to also spec the fn name in qa_utils.h/c, you can
+ * pass it in along with the func_desc
+ *
+ */
+
+void load_preferences(void) {
+ static int prefs_loaded = 0;
+ FILE *config_file;
+ char path[512], line[512], function[256], arch[64];
+
+ if(prefs_loaded) return;
+
+ int n_arch_preferences = 0;
+
+ //get the config path
+ get_config_path(path);
+ config_file = fopen(path, "r");
+ if(!config_file) return; //no prefs found
+
+ while(fgets(line, 512, config_file) != NULL) {
+ if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
+ printf("func: %s, arch: %s\n", function, arch);
+ //we have a function and we have an arch, let's set it
+ n_arch_preferences++;
+ }
+ }
+
+ fclose(config_file);
+
+ printf("Found %d prefs\n", n_arch_preferences);
+ prefs_loaded = 1;
+}
unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) {
int i = 1;
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
index 8fa0631ee..37a0fbc46 100644
--- a/volk/lib/volk_rank_archs.h
+++ b/volk/lib/volk_rank_archs.h
@@ -6,7 +6,12 @@ extern "C" {
#endif
unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch);
-
+
+////////////////////////////////////////////////////////////////////////
+//get path to volk_config profiling info
+////////////////////////////////////////////////////////////////////////
+void get_config_path(char *);
+void load_preferences(void); //FIXME DEBUG shouldn't be exported
#ifdef __cplusplus
}