diff options
Diffstat (limited to 'volk/lib')
-rw-r--r-- | volk/lib/CMakeLists.txt | 3 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 36 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 2 | ||||
-rw-r--r-- | volk/lib/testqa.cc | 190 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 39 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 111 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.h | 40 |
7 files changed, 256 insertions, 165 deletions
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index 79655f1bd..68fadc35b 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -202,7 +202,7 @@ message(STATUS "Available machines: ${available_machines}") #dependencies are all python, xml, and header implementation files file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) -file(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) +file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk/*.h) macro(gen_template tmpl output) list(APPEND volk_gen_sources ${output}) @@ -253,6 +253,7 @@ endforeach(machine_name) include_directories( ${CMAKE_BINARY_DIR}/include ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 4e361aece..e526eb2d0 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -63,12 +63,12 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { +static std::vector<std::string> get_arch_list(volk_func_desc_t desc) { std::vector<std::string> archlist; - for(int i = 0; i < desc.n_archs; i++) { + for(size_t i = 0; i < desc.n_impls; i++) { //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc - archlist.push_back(std::string(desc.indices[i])); + archlist.push_back(std::string(desc.impl_names[i])); } return archlist; @@ -256,7 +256,7 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(struct volk_func_desc desc, +bool run_volk_tests(volk_func_desc_t desc, void (*manual_func)(), std::string name, float tol, @@ -442,22 +442,32 @@ bool run_volk_tests(struct volk_func_desc desc, arch_results.push_back(!fail); } - double best_time = std::numeric_limits<double>::max(); - std::string best_arch = "generic"; - for(size_t i=0; i < arch_list.size(); i++) { - if((profile_times[i] < best_time) && arch_results[i]) { - best_time = profile_times[i]; - best_arch = arch_list[i]; + double best_time_a = std::numeric_limits<double>::max(); + double best_time_u = std::numeric_limits<double>::max(); + std::string best_arch_a = "generic"; + std::string best_arch_u = "generic"; + for(size_t i=0; i < arch_list.size(); i++) + { + if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0) + { + best_time_u = profile_times[i]; + best_arch_u = arch_list[i]; + } + if((profile_times[i] < best_time_a) && arch_results[i]) + { + best_time_a = profile_times[i]; + best_arch_a = arch_list[i]; } } - std::cout << "Best arch: " << best_arch << std::endl; + std::cout << "Best aligned arch: " << best_arch_a << std::endl; + std::cout << "Best unaligned arch: " << best_arch_u << std::endl; if(best_arch_vector) { if(puppet_master_name == "NULL") { - best_arch_vector->push_back(name + std::string(" ") + best_arch); + best_arch_vector->push_back(name + " " + best_arch_a + " " + best_arch_u); } else { - best_arch_vector->push_back(puppet_master_name + std::string(" ") + best_arch); + best_arch_vector->push_back(puppet_master_name + " " + best_arch_a + " " + best_arch_u); } } diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 1e639ac3c..0f17cdaa3 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -21,7 +21,7 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string); +bool run_volk_tests(volk_func_desc_t, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string); #define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0, "NULL"), 0); } diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 2e41c25da..f133897cb 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -2,107 +2,89 @@ #include <volk/volk.h> #include <boost/test/unit_test.hpp> -//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000); -//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000); -VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1); -VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 1); -VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 1); -VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 1); -//VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000); -//VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000); -//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000); -//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000); -VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_16u_byteswap_u, 0, 0, 20460, 1); -//VOLK_RUN_TESTS(volk_16i_32fc_dot_prod_32fc_a, 1e-4, 0, 204600, 1); -VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_add_32f_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 1); -//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000); -VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 2046000, 1); -VOLK_RUN_TESTS(volk_32fc_32f_dot_prod_32fc_a, 1e-4, 0, 204600, 1); -VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 3, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 1); -VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 1); -//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000); -VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 1); -VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 1); -VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 1); -VOLK_RUN_TESTS(volk_32f_x2_dot_prod_16i_a, 1e-4, 0, 204600, 1); -//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000); -VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 3, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32767, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 1); -VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 1); -//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000); -VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1); -//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000); -VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 1); -VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 20460, 1); -VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc_a, 1e-2, (lv_32fc_t)lv_cmake(0.953939201, 0.3), 20460, 1); +//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_branch_4_state_8, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f, 1e-5, 32768.0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2, 1e-4, 32768.0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_magnitude_16i, 1, 0, 20460, 1); +VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f, 1e-5, 32768.0, 20460, 1); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f, 1e-4, 32768.0, 20460, 1); +VOLK_RUN_TESTS(volk_16i_convert_8i, 0, 0, 20460, 1); +//VOLK_RUN_TESTS(volk_16i_max_star_16i, 0, 0, 20460, 10000); +//VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i, 0, 0, 20460, 10000); +//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000); +//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000); +VOLK_RUN_TESTS(volk_16u_byteswap, 0, 0, 20460, 1); +//VOLK_RUN_TESTS(volk_16i_32fc_dot_prod_32fc, 1e-4, 0, 204600, 1); +VOLK_RUN_TESTS(volk_32f_accumulator_s32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_add_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 20460, 1); +//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i, 0, 32768, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc, 1e-4, 0, 2046000, 1); +VOLK_RUN_TESTS(volk_32fc_32f_dot_prod_32fc, 1e-4, 0, 204600, 1); +VOLK_RUN_TESTS(volk_32fc_index_max_16u, 3, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i, 1, 32768, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_magnitude_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i, 1, 32768, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i, 1, 2<<31, 20460, 1); +VOLK_RUN_TESTS(volk_32f_convert_64f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i, 1, 128, 20460, 1); +//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f, 1e-4, 0, 2046, 1); +VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, 1e-4, 10, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_divide_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f, 1e-4, 0, 204600, 1); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_16i, 1e-4, 0, 204600, 1); +//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32f_index_max_16u, 3, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic, 1, 32767, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_max_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_min_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_normalize, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_power_32f, 1e-4, 4, 20460, 1); +VOLK_RUN_TESTS(volk_32f_sqrt_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_subtract_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32i_x2_and_32i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_32i_x2_or_32i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32u_byteswap, 0, 0, 20460, 1); +//VOLK_RUN_TESTS(volk_32u_popcnt, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_64f_x2_max_64f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_64f_x2_min_64f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_64u_byteswap, 0, 0, 20460, 1); +//VOLK_RUN_TESTS(volk_64u_popcnt, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i, 0, 256, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_8i_convert_16i, 0, 0, 20460, 1); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f, 1e-4, 100, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_conjugate_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f, 1e-4, 0, 20460, 1); +VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-2, (lv_32fc_t)lv_cmake(0.953939201, 0.3), 20460, 1); diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c index 5e5c9dfff..f787b5e2a 100644 --- a/volk/lib/volk_prefs.c +++ b/volk/lib/volk_prefs.c @@ -7,7 +7,8 @@ //#include <Windows.h> //#endif -void get_config_path(char *path) { +void volk_get_config_path(char *path) +{ const char *suffix = "/.volk/volk_config"; char *home = NULL; if (home == NULL) home = getenv("HOME"); @@ -20,38 +21,30 @@ void get_config_path(char *path) { strcat(path, suffix); } -//passing by reference in C can (***********) -int load_preferences(struct volk_arch_pref **prefs) { +size_t volk_load_preferences(volk_arch_pref_t **prefs_res) +{ FILE *config_file; - char path[512], line[512], function[128], arch[32]; - int n_arch_prefs = 0; - struct volk_arch_pref *t_pref; + char path[512], line[512]; + size_t n_arch_prefs = 0; + volk_arch_pref_t *prefs = NULL; //get the config path - get_config_path(path); + volk_get_config_path(path); if (path == NULL) return n_arch_prefs; //no prefs found config_file = fopen(path, "r"); if(!config_file) return n_arch_prefs; //no prefs found - while(fgets(line, 512, config_file) != NULL) { - if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { - n_arch_prefs++; - } - } - - //now allocate the memory required for volk_arch_prefs - (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); - t_pref = (*prefs); - //reset the file pointer and write the prefs into volk_arch_prefs - rewind(config_file); - while(fgets(line, 512, config_file) != NULL) { - if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { - strncpy(t_pref->name, function, 128); - strncpy(t_pref->arch, arch, 32); - t_pref++; + while(fgets(line, sizeof(line), config_file) != NULL) + { + prefs = (volk_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs)); + volk_arch_pref_t *p = prefs + n_arch_prefs; + if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_", 5)) + { + n_arch_prefs++; } } fclose(config_file); + *prefs_res = prefs; return n_arch_prefs; } diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 865d60955..6ab013f26 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,43 +1,112 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + #include <volk_rank_archs.h> #include <volk/volk_prefs.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4 + #define __popcnt __builtin_popcount +#else + inline unsigned __popcnt(unsigned num) + { + unsigned pop = 0; + while(num) + { + if (num & 0x1) pop++; + num >>= 1; + } + return pop; + } +#endif + +int volk_get_index( + const char *impl_names[], //list of implementations by name + const size_t n_impls, //number of implementations available + const char *impl_name //the implementation name to find +){ unsigned int i; - for(i=0; i<n_archs; i++) { - if(!strncmp(indices[i], arch_name, 20)) { + for (i = 0; i < n_impls; i++) { + if(!strncmp(impl_names[i], impl_name, 20)) { return i; } } + //TODO return -1; //something terrible should happen here printf("Volk warning: no arch found, returning generic impl\n"); - return get_index(indices, n_archs, "generic"); //but we'll fake it for now + return volk_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now } -unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { - unsigned int i; - unsigned int best_val = 0; - static struct volk_arch_pref *volk_arch_prefs; - static unsigned int n_arch_prefs = 0; +int volk_rank_archs( + const char *kern_name, //name of the kernel to rank + const char *impl_names[], //list of implementations by name + const int* impl_deps, //requirement mask per implementation + const bool* alignment, //alignment status of each implementation + size_t n_impls, //number of implementations available + const bool align //if false, filter aligned implementations +){ + size_t i; + static volk_arch_pref_t *volk_arch_prefs; + static size_t n_arch_prefs = 0; static int prefs_loaded = 0; if(!prefs_loaded) { - n_arch_prefs = load_preferences(&volk_arch_prefs); + n_arch_prefs = volk_load_preferences(&volk_arch_prefs); prefs_loaded = 1; } - //now look for the function name in the prefs list - for(i=0; i < n_arch_prefs; i++) { - if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it - return get_index(indices, n_archs, volk_arch_prefs[i].arch); - } - } + //now look for the function name in the prefs list + for(i = 0; i < n_arch_prefs; i++) + { + if(!strncmp(kern_name, volk_arch_prefs[i].name, sizeof(volk_arch_prefs[i].name))) //found it + { + const char *impl_name = align? volk_arch_prefs[i].impl_a : volk_arch_prefs[i].impl_u; + return volk_get_index(impl_names, n_impls, impl_name); + } + } - for(i=1; i < n_archs; ++i) { - if((arch_defs[i]&(!arch)) == 0) { - best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; + //return the best index with the largest deps + size_t best_index_a = 0; + size_t best_index_u = 0; + int best_value_a = -1; + int best_value_u = -1; + for(i = 0; i < n_impls; i++) + { + const signed val = __popcnt(impl_deps[i]); + if (alignment[i] && val > best_value_a) + { + best_index_a = i; + best_value_a = val; + } + if (!alignment[i] && val > best_value_u) + { + best_index_u = i; + best_value_u = val; + } } - } - return best_val; + + //when align and we found a best aligned, use it + if (align && best_value_a != -1) return best_index_a; + + //otherwise return the best unaligned + return best_index_u; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 546240d2c..b3bf8ff17 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -1,12 +1,48 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + #ifndef INCLUDED_VOLK_RANK_ARCHS_H #define INCLUDED_VOLK_RANK_ARCHS_H +#include <stdlib.h> +#include <stdbool.h> + #ifdef __cplusplus extern "C" { #endif -unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); -unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); +int volk_get_index( + const char *impl_names[], //list of implementations by name + const size_t n_impls, //number of implementations available + const char *impl_name //the implementation name to find +); + +int volk_rank_archs( + const char *kern_name, //name of the kernel to rank + const char *impl_names[], //list of implementations by name + const int* impl_deps, //requirement mask per implementation + const bool* alignment, //alignment status of each implementation + size_t n_impls, //number of implementations available + const bool align //if false, filter aligned implementations +); #ifdef __cplusplus } |