summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
authorJosh Blum2013-01-13 13:51:46 -0800
committerJosh Blum2013-01-13 15:23:48 -0800
commite826097e09fdfb04d14bf87861646b88229db881 (patch)
tree38d4f3e09cfbcedcbfc4ed6a18c0aafa4121dd9c /volk/lib
parent3abf4a11534d0db96e32830f629c8e34a8098798 (diff)
downloadgnuradio-e826097e09fdfb04d14bf87861646b88229db881.tar.gz
gnuradio-e826097e09fdfb04d14bf87861646b88229db881.tar.bz2
gnuradio-e826097e09fdfb04d14bf87861646b88229db881.zip
gras: support changeset for 3.6.4
used volk from next branch cf5c930d89ac89ba5a0da4a616c88d3c37e018ae for grextras support (it uses the dispatcher) empty stubs for the gr_basic_block msg passing. This is going to be difficult to figure out. The alias stuff may or may not be related most qa pass, there seems to be some additional issues, will be working through them on futher commits Conflicts: gnuradio-core/CMakeLists.txt gnuradio-core/src/lib/runtime/CMakeLists.txt gnuradio-core/src/lib/runtime/gr_block.cc gnuradio-core/src/lib/runtime/gr_block.h gnuradio-core/src/lib/runtime/gr_hier_block2.h gnuradio-core/src/lib/runtime/gr_top_block.h gnuradio-core/src/python/gnuradio/gr/__init__.py gr-audio/examples/c++/CMakeLists.txt gr-fcd/examples/c++/CMakeLists.txt grc/python/Port.py
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/CMakeLists.txt3
-rw-r--r--volk/lib/qa_utils.cc36
-rw-r--r--volk/lib/qa_utils.h2
-rw-r--r--volk/lib/testqa.cc190
-rw-r--r--volk/lib/volk_prefs.c39
-rw-r--r--volk/lib/volk_rank_archs.c111
-rw-r--r--volk/lib/volk_rank_archs.h40
7 files changed, 256 insertions, 165 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
index 79655f1bd..68fadc35b 100644
--- a/volk/lib/CMakeLists.txt
+++ b/volk/lib/CMakeLists.txt
@@ -202,7 +202,7 @@ message(STATUS "Available machines: ${available_machines}")
#dependencies are all python, xml, and header implementation files
file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
-file(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h)
+file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk/*.h)
macro(gen_template tmpl output)
list(APPEND volk_gen_sources ${output})
@@ -253,6 +253,7 @@ endforeach(machine_name)
include_directories(
${CMAKE_BINARY_DIR}/include
${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_SOURCE_DIR}/kernels
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
)
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 4e361aece..e526eb2d0 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -63,12 +63,12 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
}
}
-static std::vector<std::string> get_arch_list(struct volk_func_desc desc) {
+static std::vector<std::string> get_arch_list(volk_func_desc_t desc) {
std::vector<std::string> archlist;
- for(int i = 0; i < desc.n_archs; i++) {
+ for(size_t i = 0; i < desc.n_impls; i++) {
//if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
- archlist.push_back(std::string(desc.indices[i]));
+ archlist.push_back(std::string(desc.impl_names[i]));
}
return archlist;
@@ -256,7 +256,7 @@ public:
private: std::list<std::vector<char> > _mems;
};
-bool run_volk_tests(struct volk_func_desc desc,
+bool run_volk_tests(volk_func_desc_t desc,
void (*manual_func)(),
std::string name,
float tol,
@@ -442,22 +442,32 @@ bool run_volk_tests(struct volk_func_desc desc,
arch_results.push_back(!fail);
}
- double best_time = std::numeric_limits<double>::max();
- std::string best_arch = "generic";
- for(size_t i=0; i < arch_list.size(); i++) {
- if((profile_times[i] < best_time) && arch_results[i]) {
- best_time = profile_times[i];
- best_arch = arch_list[i];
+ double best_time_a = std::numeric_limits<double>::max();
+ double best_time_u = std::numeric_limits<double>::max();
+ std::string best_arch_a = "generic";
+ std::string best_arch_u = "generic";
+ for(size_t i=0; i < arch_list.size(); i++)
+ {
+ if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0)
+ {
+ best_time_u = profile_times[i];
+ best_arch_u = arch_list[i];
+ }
+ if((profile_times[i] < best_time_a) && arch_results[i])
+ {
+ best_time_a = profile_times[i];
+ best_arch_a = arch_list[i];
}
}
- std::cout << "Best arch: " << best_arch << std::endl;
+ std::cout << "Best aligned arch: " << best_arch_a << std::endl;
+ std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
if(best_arch_vector) {
if(puppet_master_name == "NULL") {
- best_arch_vector->push_back(name + std::string(" ") + best_arch);
+ best_arch_vector->push_back(name + " " + best_arch_a + " " + best_arch_u);
}
else {
- best_arch_vector->push_back(puppet_master_name + std::string(" ") + best_arch);
+ best_arch_vector->push_back(puppet_master_name + " " + best_arch_a + " " + best_arch_u);
}
}
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 1e639ac3c..0f17cdaa3 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -21,7 +21,7 @@ volk_type_t volk_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
-bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string);
+bool run_volk_tests(volk_func_desc_t, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string);
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0, "NULL"), 0); }
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 2e41c25da..f133897cb 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -2,107 +2,89 @@
#include <volk/volk.h>
#include <boost/test/unit_test.hpp>
-//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000);
-//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 1);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 1);
-VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 1);
-//VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000);
-//VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000);
-//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000);
-//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000);
-VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_16u_byteswap_u, 0, 0, 20460, 1);
-//VOLK_RUN_TESTS(volk_16i_32fc_dot_prod_32fc_a, 1e-4, 0, 204600, 1);
-VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_add_32f_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 1);
-//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 2046000, 1);
-VOLK_RUN_TESTS(volk_32fc_32f_dot_prod_32fc_a, 1e-4, 0, 204600, 1);
-VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 3, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 1);
-//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 1);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 1);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_16i_a, 1e-4, 0, 204600, 1);
-//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 3, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32767, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 1);
-//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1);
-//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc_a, 1e-2, (lv_32fc_t)lv_cmake(0.953939201, 0.3), 20460, 1);
+//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000);
+//VOLK_RUN_TESTS(volk_16i_branch_4_state_8, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f, 1e-5, 32768.0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2, 1e-4, 32768.0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_magnitude_16i, 1, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f, 1e-5, 32768.0, 20460, 1);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f, 1e-4, 32768.0, 20460, 1);
+VOLK_RUN_TESTS(volk_16i_convert_8i, 0, 0, 20460, 1);
+//VOLK_RUN_TESTS(volk_16i_max_star_16i, 0, 0, 20460, 10000);
+//VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i, 0, 0, 20460, 10000);
+//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000);
+//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000);
+VOLK_RUN_TESTS(volk_16u_byteswap, 0, 0, 20460, 1);
+//VOLK_RUN_TESTS(volk_16i_32fc_dot_prod_32fc, 1e-4, 0, 204600, 1);
+VOLK_RUN_TESTS(volk_32f_accumulator_s32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 20460, 1);
+//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i, 0, 32768, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc, 1e-4, 0, 2046000, 1);
+VOLK_RUN_TESTS(volk_32fc_32f_dot_prod_32fc, 1e-4, 0, 204600, 1);
+VOLK_RUN_TESTS(volk_32fc_index_max_16u, 3, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i, 1, 32768, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_magnitude_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i, 1, 32768, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i, 1, 2<<31, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_convert_64f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i, 1, 128, 20460, 1);
+//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f, 1e-4, 0, 2046, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, 1e-4, 10, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_divide_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f, 1e-4, 0, 204600, 1);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_16i, 1e-4, 0, 204600, 1);
+//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000);
+VOLK_RUN_TESTS(volk_32f_index_max_16u, 3, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic, 1, 32767, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_max_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_min_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_normalize, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_power_32f, 1e-4, 4, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_sqrt_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_subtract_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32i_x2_and_32i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32i_x2_or_32i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32u_byteswap, 0, 0, 20460, 1);
+//VOLK_RUN_TESTS(volk_32u_popcnt, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_64f_x2_max_64f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_64f_x2_min_64f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_64u_byteswap, 0, 0, 20460, 1);
+//VOLK_RUN_TESTS(volk_64u_popcnt, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i, 0, 256, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_8i_convert_16i, 0, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-2, (lv_32fc_t)lv_cmake(0.953939201, 0.3), 20460, 1);
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
index 5e5c9dfff..f787b5e2a 100644
--- a/volk/lib/volk_prefs.c
+++ b/volk/lib/volk_prefs.c
@@ -7,7 +7,8 @@
//#include <Windows.h>
//#endif
-void get_config_path(char *path) {
+void volk_get_config_path(char *path)
+{
const char *suffix = "/.volk/volk_config";
char *home = NULL;
if (home == NULL) home = getenv("HOME");
@@ -20,38 +21,30 @@ void get_config_path(char *path) {
strcat(path, suffix);
}
-//passing by reference in C can (***********)
-int load_preferences(struct volk_arch_pref **prefs) {
+size_t volk_load_preferences(volk_arch_pref_t **prefs_res)
+{
FILE *config_file;
- char path[512], line[512], function[128], arch[32];
- int n_arch_prefs = 0;
- struct volk_arch_pref *t_pref;
+ char path[512], line[512];
+ size_t n_arch_prefs = 0;
+ volk_arch_pref_t *prefs = NULL;
//get the config path
- get_config_path(path);
+ volk_get_config_path(path);
if (path == NULL) return n_arch_prefs; //no prefs found
config_file = fopen(path, "r");
if(!config_file) return n_arch_prefs; //no prefs found
- while(fgets(line, 512, config_file) != NULL) {
- if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
- n_arch_prefs++;
- }
- }
-
- //now allocate the memory required for volk_arch_prefs
- (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref));
- t_pref = (*prefs);
-
//reset the file pointer and write the prefs into volk_arch_prefs
- rewind(config_file);
- while(fgets(line, 512, config_file) != NULL) {
- if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) {
- strncpy(t_pref->name, function, 128);
- strncpy(t_pref->arch, arch, 32);
- t_pref++;
+ while(fgets(line, sizeof(line), config_file) != NULL)
+ {
+ prefs = (volk_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
+ volk_arch_pref_t *p = prefs + n_arch_prefs;
+ if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_", 5))
+ {
+ n_arch_prefs++;
}
}
fclose(config_file);
+ *prefs_res = prefs;
return n_arch_prefs;
}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index 865d60955..6ab013f26 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -1,43 +1,112 @@
+/*
+ * Copyright 2011-2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
#include <volk_rank_archs.h>
#include <volk/volk_prefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) {
+#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
+ #define __popcnt __builtin_popcount
+#else
+ inline unsigned __popcnt(unsigned num)
+ {
+ unsigned pop = 0;
+ while(num)
+ {
+ if (num & 0x1) pop++;
+ num >>= 1;
+ }
+ return pop;
+ }
+#endif
+
+int volk_get_index(
+ const char *impl_names[], //list of implementations by name
+ const size_t n_impls, //number of implementations available
+ const char *impl_name //the implementation name to find
+){
unsigned int i;
- for(i=0; i<n_archs; i++) {
- if(!strncmp(indices[i], arch_name, 20)) {
+ for (i = 0; i < n_impls; i++) {
+ if(!strncmp(impl_names[i], impl_name, 20)) {
return i;
}
}
+ //TODO return -1;
//something terrible should happen here
printf("Volk warning: no arch found, returning generic impl\n");
- return get_index(indices, n_archs, "generic"); //but we'll fake it for now
+ return volk_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
}
-unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) {
- unsigned int i;
- unsigned int best_val = 0;
- static struct volk_arch_pref *volk_arch_prefs;
- static unsigned int n_arch_prefs = 0;
+int volk_rank_archs(
+ const char *kern_name, //name of the kernel to rank
+ const char *impl_names[], //list of implementations by name
+ const int* impl_deps, //requirement mask per implementation
+ const bool* alignment, //alignment status of each implementation
+ size_t n_impls, //number of implementations available
+ const bool align //if false, filter aligned implementations
+){
+ size_t i;
+ static volk_arch_pref_t *volk_arch_prefs;
+ static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded) {
- n_arch_prefs = load_preferences(&volk_arch_prefs);
+ n_arch_prefs = volk_load_preferences(&volk_arch_prefs);
prefs_loaded = 1;
}
- //now look for the function name in the prefs list
- for(i=0; i < n_arch_prefs; i++) {
- if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it
- return get_index(indices, n_archs, volk_arch_prefs[i].arch);
- }
- }
+ //now look for the function name in the prefs list
+ for(i = 0; i < n_arch_prefs; i++)
+ {
+ if(!strncmp(kern_name, volk_arch_prefs[i].name, sizeof(volk_arch_prefs[i].name))) //found it
+ {
+ const char *impl_name = align? volk_arch_prefs[i].impl_a : volk_arch_prefs[i].impl_u;
+ return volk_get_index(impl_names, n_impls, impl_name);
+ }
+ }
- for(i=1; i < n_archs; ++i) {
- if((arch_defs[i]&(!arch)) == 0) {
- best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;
+ //return the best index with the largest deps
+ size_t best_index_a = 0;
+ size_t best_index_u = 0;
+ int best_value_a = -1;
+ int best_value_u = -1;
+ for(i = 0; i < n_impls; i++)
+ {
+ const signed val = __popcnt(impl_deps[i]);
+ if (alignment[i] && val > best_value_a)
+ {
+ best_index_a = i;
+ best_value_a = val;
+ }
+ if (!alignment[i] && val > best_value_u)
+ {
+ best_index_u = i;
+ best_value_u = val;
+ }
}
- }
- return best_val;
+
+ //when align and we found a best aligned, use it
+ if (align && best_value_a != -1) return best_index_a;
+
+ //otherwise return the best unaligned
+ return best_index_u;
}
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
index 546240d2c..b3bf8ff17 100644
--- a/volk/lib/volk_rank_archs.h
+++ b/volk/lib/volk_rank_archs.h
@@ -1,12 +1,48 @@
+/*
+ * Copyright 2011-2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
#ifndef INCLUDED_VOLK_RANK_ARCHS_H
#define INCLUDED_VOLK_RANK_ARCHS_H
+#include <stdlib.h>
+#include <stdbool.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name);
-unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch);
+int volk_get_index(
+ const char *impl_names[], //list of implementations by name
+ const size_t n_impls, //number of implementations available
+ const char *impl_name //the implementation name to find
+);
+
+int volk_rank_archs(
+ const char *kern_name, //name of the kernel to rank
+ const char *impl_names[], //list of implementations by name
+ const int* impl_deps, //requirement mask per implementation
+ const bool* alignment, //alignment status of each implementation
+ size_t n_impls, //number of implementations available
+ const bool align //if false, filter aligned implementations
+);
#ifdef __cplusplus
}