summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/CMakeLists.txt9
-rw-r--r--volk/apps/CMakeLists.txt38
-rw-r--r--volk/apps/volk_profile.cc136
-rw-r--r--volk/gen/make_c.py14
-rw-r--r--volk/gen/make_each_machine_c.py1
-rw-r--r--volk/gen/make_machines_h.py1
-rw-r--r--volk/include/volk/volk_prefs.h19
-rw-r--r--volk/lib/CMakeLists.txt1
-rw-r--r--volk/lib/qa_utils.cc35
-rw-r--r--volk/lib/qa_utils.h7
-rw-r--r--volk/lib/volk_prefs.c49
-rw-r--r--volk/lib/volk_rank_archs.c40
-rw-r--r--volk/lib/volk_rank_archs.h4
-rw-r--r--volk/volk.pc.in5
14 files changed, 329 insertions, 30 deletions
diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt
index 51b999eee..320efa7b4 100644
--- a/volk/CMakeLists.txt
+++ b/volk/CMakeLists.txt
@@ -52,8 +52,8 @@ CONFIGURE_FILE(
${CMAKE_CURRENT_BINARY_DIR}/volk.pc
@ONLY)
-INSTALL_FILES(
- ${CMAKE_CURRENT_BINARY_DIR}/volk.pc
+INSTALL(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc
DESTINATION lib${LIB_SUFFIX}/pkgconfig
)
@@ -72,6 +72,11 @@ INSTALL(
ADD_SUBDIRECTORY(lib)
########################################################################
+# And the utility apps
+########################################################################
+ADD_SUBDIRECTORY(apps)
+
+########################################################################
# Print summary
########################################################################
MESSAGE(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}")
diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt
new file mode 100644
index 000000000..a0bf7e900
--- /dev/null
+++ b/volk/apps/CMakeLists.txt
@@ -0,0 +1,38 @@
+#
+# Copyright 2011 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+########################################################################
+# Setup profiler
+########################################################################
+IF(MSVC)
+ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc)
+ENDIF(MSVC)
+
+INCLUDE_DIRECTORIES(
+ ${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_BINARY_DIR}/include
+ ${CMAKE_SOURCE_DIR}/lib
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+ADD_EXECUTABLE(volk_profile
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc
+ ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
+)
+
+TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES})
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
new file mode 100644
index 000000000..3fe227400
--- /dev/null
+++ b/volk/apps/volk_profile.cc
@@ -0,0 +1,136 @@
+#include "qa_utils.h"
+extern "C" {
+#include <volk/volk.h>
+#include <volk/volk_prefs.h>
+}
+#include <vector>
+#include <boost/foreach.hpp>
+#include <iostream>
+#include <fstream>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+int main(int argc, char *argv[]) {
+
+ std::vector<std::string> results;
+/*
+ //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results);
+ //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results);
+ VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results);
+ //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results);
+ //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results);
+ VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results);
+ //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results);
+ VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results);
+ //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results);
+ VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results);
+ //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results);
+ VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results);
+ VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results);
+ VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results);
+ VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results);
+ VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results);
+ VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results);
+ //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results);
+ //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results);
+ VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results);
+ VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results);
+ VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results);
+ VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results);
+ */
+ VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results);
+ VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
+
+ char path[256];
+ get_config_path(path);
+ std::string config_path(path);
+ std::ofstream config;
+ std::cout << "filename: " << config_path << std::endl;
+ config.open(config_path.c_str());
+ if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
+ std::string dir(getenv("HOME"));
+ dir += "/.volk";
+ if(mkdir(dir.c_str(), 0777) == -1) {
+ std::cout << "Error creating directory " << dir << std::endl;
+ return -1;
+ }
+ config.open(config_path.c_str());
+ if(!config.is_open()) {
+ std::cout << "Error opening file " << config_path << std::endl;
+ return -1;
+ }
+ }
+
+ config << "\
+#this file is generated by volk_profile.\n\
+#the function name is followed by the preferred architecture.\n\
+";
+
+ BOOST_FOREACH(std::string result, results) {
+ config << result << std::endl;
+ }
+ config.close();
+}
diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py
index 4e67f31ff..fa08bbb0e 100644
--- a/volk/gen/make_c.py
+++ b/volk/gen/make_c.py
@@ -66,23 +66,11 @@ struct volk_machine *get_machine(void) {
}
}
-static unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) {
- int i;
- for(i=0; i<n_archs; i++) {
- if(!strncmp(indices[i], arch_name, 20)) {
- return i;
- }
- }
- //something terrible should happen here
- printf("Volk warning: no arch found, returning generic impl\n");
- return get_index(indices, n_archs, "generic"); //but we'll fake it for now
-}
-
"""
for i in range(len(functions)):
tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n"
- tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_arch_defs, get_machine()->%s_n_archs, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i])
+ tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i], functions[i])
tempstring += " %s(%s);\n}\n\n" % (functions[i], my_arglist[i])
tempstring += replace_volk.sub("p", functions[i]) + " " + functions[i] + " = &get_" + functions[i] + ";\n\n"
tempstring += "void %s_manual%s\n" % (functions[i], arched_arglist[i])
diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py
index a7d3bb752..dfb7011b4 100644
--- a/volk/gen/make_each_machine_c.py
+++ b/volk/gen/make_each_machine_c.py
@@ -41,6 +41,7 @@ def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglis
#fill in the description for each function
for i in range(len(functions)):
+ tempstring += " \"%s\",\n"%functions[i]
tempstring += " {%s},\n"%(', '.join(['"%s"'%tag for tag in machine_taglists[i]]))
tempstring += " {%s},\n"%(', '.join([' | '.join(['(1 << LV_%s)'%fc for fc in fcount]) for fcount in machine_fcountlists[i]]))
tempstring += " {%s},\n"%(', '.join(['%s_%s'%(functions[i], tag) for tag in machine_taglists[i]]))
diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py
index 09ada3e0d..563de18a6 100644
--- a/volk/gen/make_machines_h.py
+++ b/volk/gen/make_machines_h.py
@@ -35,6 +35,7 @@ struct volk_machine {
const char *name;
"""
for function in functions:
+ tempstring += " const char *%s_name;\n"%function
tempstring += " const char *%s_indices[%d];\n"%(function, len(archs))
tempstring += " const int %s_arch_defs[%d];\n"%(function, len(archs))
tempstring += " const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs))
diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h
new file mode 100644
index 000000000..77bcb709b
--- /dev/null
+++ b/volk/include/volk/volk_prefs.h
@@ -0,0 +1,19 @@
+#ifndef INCLUDED_VOLK_PREFS_H
+#define INCLUDED_VOLK_PREFS_H
+
+struct volk_arch_pref {
+ char name[128];
+ char arch[32];
+};
+
+////////////////////////////////////////////////////////////////////////
+// get path to volk_config profiling info
+////////////////////////////////////////////////////////////////////////
+void get_config_path(char *);
+
+////////////////////////////////////////////////////////////////////////
+// load prefs into global prefs struct
+////////////////////////////////////////////////////////////////////////
+int load_preferences(struct volk_arch_pref **);
+
+#endif //INCLUDED_VOLK_PREFS_H
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
index b0d60781f..e8fe5cc58 100644
--- a/volk/lib/CMakeLists.txt
+++ b/volk/lib/CMakeLists.txt
@@ -200,6 +200,7 @@ INCLUDE_DIRECTORIES(
)
LIST(APPEND volk_sources
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c
${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c
${volk_gen_sources}
)
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index fa091ad0d..3eb1da1f1 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -8,6 +8,7 @@
#include <list>
#include <ctime>
#include <cmath>
+#include <limits>
#include <boost/lexical_cast.hpp>
#include <volk/volk.h>
#include <volk/volk_cpu.h>
@@ -240,7 +241,15 @@ public:
private: std::list<std::vector<char> > _mems;
};
-bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
+bool run_volk_tests(struct volk_func_desc desc,
+ void (*manual_func)(),
+ std::string name,
+ float tol,
+ float scalar,
+ int vlen,
+ int iter,
+ std::vector<std::string> *best_arch_vector = 0
+ ) {
std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
//first let's get a list of available architectures for the test
@@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
//now run the test
clock_t start, end;
+ std::vector<double> profile_times;
for(int i = 0; i < arch_list.size(); i++) {
start = clock();
@@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
}
end = clock();
- std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+ double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
+
+ profile_times.push_back(arch_time);
}
+
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
int generic_offset=0;
@@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
bool fail = false;
bool fail_global = false;
+ std::vector<bool> arch_results;
for(int i=0; i<arch_list.size(); i++) {
+ fail = false;
if(i != generic_offset) {
for(int j=0; j<both_sigs.size(); j++) {
if(both_sigs[j].is_float) {
@@ -395,6 +411,21 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri
//fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
}
}
+ arch_results.push_back(!fail);
+ }
+
+ double best_time = std::numeric_limits<double>::max();
+ std::string best_arch = "generic";
+ for(int i=0; i < arch_list.size(); i++) {
+ if((profile_times[i] < best_time) && arch_results[i]) {
+ best_time = profile_times[i];
+ best_arch = arch_list[i];
+ }
+ }
+
+ std::cout << "Best arch: " << best_arch << std::endl;
+ if(best_arch_vector) {
+ best_arch_vector->push_back(name + std::string(" ") + best_arch);
}
return fail_global;
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 304a00533..a1bc1f20c 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -3,6 +3,7 @@
#include <cstdlib>
#include <string>
+#include <vector>
#include <volk/volk.h>
#include <volk/volk_common.h>
@@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
-bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int);
-
-#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); }
+bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *);
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); }
+#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results)
typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
new file mode 100644
index 000000000..ebfe3bc40
--- /dev/null
+++ b/volk/lib/volk_prefs.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <volk/volk_prefs.h>
+
+//#if defined(_WIN32)
+//#include <Windows.h>
+//#endif
+
+void get_config_path(char *path) {
+ const char *suffix = "/.volk/volk_config";
+ strcpy(path, getenv("HOME"));
+ strcat(path, suffix);
+}
+
+//passing by reference in C can suck my balls
+int load_preferences(struct volk_arch_pref **prefs) {
+ FILE *config_file;
+ char path[512], line[512], function[128], arch[32];
+ int n_arch_prefs = 0;
+ struct volk_arch_pref *t_pref;
+
+ //get the config path
+ get_config_path(path);
+ config_file = fopen(path, "r");
+ if(!config_file) return; //no prefs found
+
+ while(fgets(line, 512, config_file) != NULL) {
+ if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
+ n_arch_prefs++;
+ }
+ }
+
+ //now allocate the memory required for volk_arch_prefs
+ (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref));
+ t_pref = (*prefs);
+
+ //reset the file pointer and write the prefs into volk_arch_prefs
+ rewind(config_file);
+ while(fgets(line, 512, config_file) != NULL) {
+ if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
+ strncpy(t_pref->name, function, 128);
+ strncpy(t_pref->arch, arch, 32);
+ t_pref++;
+ }
+ }
+ fclose(config_file);
+ return n_arch_prefs;
+}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index 25ad75cda..e10433fd0 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -1,10 +1,40 @@
-#include<volk_rank_archs.h>
-#include<stdio.h>
+#include <volk_rank_archs.h>
+#include <volk/volk_prefs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
-unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) {
- int i = 1;
+unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) {
+ int i;
+ for(i=0; i<n_archs; i++) {
+ if(!strncmp(indices[i], arch_name, 20)) {
+ return i;
+ }
+ }
+ //something terrible should happen here
+ printf("Volk warning: no arch found, returning generic impl\n");
+ return get_index(indices, n_archs, "generic"); //but we'll fake it for now
+}
+
+unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) {
+ int i;
unsigned int best_val = 0;
- for(; i < n_archs; ++i) {
+ static struct volk_arch_pref *volk_arch_prefs;
+ static int n_arch_prefs = 0;
+ static int prefs_loaded = 0;
+ if(!prefs_loaded) {
+ n_arch_prefs = load_preferences(&volk_arch_prefs);
+ prefs_loaded = 1;
+ }
+
+ //now look for the function name in the prefs list
+ for(i=0; i < n_arch_prefs; i++) {
+ if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it
+ return get_index(indices, n_archs, volk_arch_prefs[i].arch);
+ }
+ }
+
+ for(i=1; i < n_archs; ++i) {
if((arch_defs[i]&(!arch)) == 0) {
best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;
}
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
index 8fa0631ee..546240d2c 100644
--- a/volk/lib/volk_rank_archs.h
+++ b/volk/lib/volk_rank_archs.h
@@ -5,8 +5,8 @@
extern "C" {
#endif
-unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch);
-
+unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name);
+unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch);
#ifdef __cplusplus
}
diff --git a/volk/volk.pc.in b/volk/volk.pc.in
index 85425ba64..58e976786 100644
--- a/volk/volk.pc.in
+++ b/volk/volk.pc.in
@@ -5,11 +5,10 @@ includedir=@includedir@
LV_CXXFLAGS=@LV_CXXFLAGS@
-
Name: volk
-Description: VOLK.. Vector Optimized Library of Kernels
+Description: VOLK: Vector Optimized Library of Kernels
Requires:
Version: @VERSION@
-Libs: -lvolk -lvolk_runtime @LV_ORC_PKGCONFIG@
+Libs: -lvolk
Cflags: -I${includedir} ${LV_CXXFLAGS}