diff options
Diffstat (limited to 'volk/lib')
-rw-r--r-- | volk/lib/gcc_x86_cpuid.h | 6 | ||||
-rw-r--r-- | volk/lib/qa_16s_add_quad_aligned16.cc | 10 | ||||
-rw-r--r-- | volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 38 | ||||
-rw-r--r-- | volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc | 18 | ||||
-rw-r--r-- | volk/lib/qa_16s_quad_max_star_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32f_fm_detect_aligned16.cc | 8 | ||||
-rw-r--r-- | volk/lib/qa_32f_index_max_aligned16.cc | 36 | ||||
-rw-r--r-- | volk/lib/qa_32fc_index_max_aligned16.cc | 34 | ||||
-rw-r--r-- | volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc | 8 | ||||
-rw-r--r-- | volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc | 28 | ||||
-rw-r--r-- | volk/lib/qa_32u_popcnt_aligned16.cc | 8 | ||||
-rw-r--r-- | volk/lib/qa_64u_popcnt_aligned16.cc | 8 | ||||
-rw-r--r-- | volk/lib/qa_utils.cc | 54 | ||||
-rw-r--r-- | volk/lib/volk_prefs.c | 2 | ||||
-rw-r--r-- | volk/lib/volk_rank_archs.c | 4 |
15 files changed, 134 insertions, 134 deletions
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h index 2d0916fb3..98eeb33a3 100644 --- a/volk/lib/gcc_x86_cpuid.h +++ b/volk/lib/gcc_x86_cpuid.h @@ -5,16 +5,16 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) any * later version. - * + * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * + * * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. - * + * * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 5d5eb7e18..8da43b972 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -16,7 +16,7 @@ void qa_16s_add_quad_aligned16::t1() { void qa_16s_add_quad_aligned16::t1() { - + volk_environment_init(); clock_t start, end; double total; @@ -27,7 +27,7 @@ void qa_16s_add_quad_aligned16::t1() { __VOLK_ATTR_ALIGNED(16) short input2[vlen]; __VOLK_ATTR_ALIGNED(16) short input3[vlen]; __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; __VOLK_ATTR_ALIGNED(16) short output1[vlen]; __VOLK_ATTR_ALIGNED(16) short output2[vlen]; @@ -48,13 +48,13 @@ void qa_16s_add_quad_aligned16::t1() { short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; - + input0[i] = plus0 - minus0; input1[i] = plus1 - minus1; input2[i] = plus2 - minus2; input3[i] = plus3 - minus3; input4[i] = plus4 - minus4; - + } printf("16s_add_quad_aligned\n"); @@ -76,7 +76,7 @@ void qa_16s_add_quad_aligned16::t1() { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); } - + for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 2e6e6a1a0..5a58569a1 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -22,17 +22,17 @@ void qa_16s_branch_4_state_8_aligned16::t1() { static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f}; static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d}; static char* permuters[4] = {permute0, permute1, permute2, permute3}; - + unsigned int num_bytes = vlen << 1; volk_environment_init(); clock_t start, end; double total; - + __VOLK_ATTR_ALIGNED(16) short target[vlen]; __VOLK_ATTR_ALIGNED(16) short target2[vlen]; __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; @@ -45,29 +45,29 @@ void qa_16s_branch_4_state_8_aligned16::t1() { __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; - - + + for(int i = 0; i < vlen; ++i) { src0[i] = i; - + } - + printf("16s_branch_4_state_8_aligned\n"); - - + + start = clock(); for(int i = 0; i < num_iters; ++i) { volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); } end = clock(); - + total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("permute_and_scalar_add_time: %f\n", total); - - + + start = clock(); for(int i = 0; i < num_iters; ++i) { @@ -78,25 +78,25 @@ void qa_16s_branch_4_state_8_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("branch_4_state_8_time, ssse3: %f\n", total); - + start = clock(); for(int i = 0; i < num_iters; ++i) { volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic"); } end = clock(); - + total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("permute_and_scalar_add_time, generic: %f\n", total); - - - + + + for(int i = 0; i < vlen; ++i) { printf("psa... %d, b4s8... %d\n", target[i], target3[i]); } - + for(int i = 0; i < vlen; ++i) { - + CPPUNIT_ASSERT(target[i] == target2[i]); CPPUNIT_ASSERT(target[i] == target3[i]); } diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 3cd4e906d..dadd2c580 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -16,13 +16,13 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { void qa_16s_permute_and_scalar_add_aligned16::t1() { const int vlen = 64; - + unsigned int num_bytes = vlen << 1; volk_environment_init(); clock_t start, end; double total; - + __VOLK_ATTR_ALIGNED(16) short target[vlen]; __VOLK_ATTR_ALIGNED(16) short target2[vlen]; __VOLK_ATTR_ALIGNED(16) short src0[vlen]; @@ -43,7 +43,7 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { } printf("16s_permute_and_scalar_add_aligned\n"); - + start = clock(); for(int i = 0; i < 100000; ++i) { volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic"); @@ -53,24 +53,24 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); - + start = clock(); for(int i = 0; i < 100000; ++i) { volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); } end = clock(); - + total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse2_time: %f\n", total); - - + + for(int i = 0; i < vlen; ++i) { //printf("generic... %d, sse2... %d\n", target[i], target2[i]); } - + for(int i = 0; i < vlen; ++i) { - + CPPUNIT_ASSERT(target[i] == target2[i]); } } diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 192a69e35..2a5dec44a 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -16,7 +16,7 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; __VOLK_ATTR_ALIGNED(16) short input1[vlen]; __VOLK_ATTR_ALIGNED(16) short input2[vlen]; @@ -50,9 +50,9 @@ void qa_16s_quad_max_star_aligned16::t1() { for(int i = 0; i < vlen; ++i) { printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]); } - + for(int i = 0; i < vlen; ++i) { - + CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); } } diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index a2e7a85be..4e792ec6c 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -15,18 +15,18 @@ void qa_32f_fm_detect_aligned16::t1() { #else void qa_32f_fm_detect_aligned16::t1() { - + volk_environment_init(); clock_t start, end; double total; const int vlen = 3201; const int ITERS = 10000; __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; __VOLK_ATTR_ALIGNED(16) float output01[vlen]; - for(int i = 0; i < vlen; ++i) { + for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); } printf("32f_fm_detect_aligned\n"); @@ -51,7 +51,7 @@ void qa_32f_fm_detect_aligned16::t1() { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); } - + for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4); diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc index a1c3d4cd1..2df206726 100644 --- a/volk/lib/qa_32f_index_max_aligned16.cc +++ b/volk/lib/qa_32f_index_max_aligned16.cc @@ -34,12 +34,12 @@ void qa_32f_index_max_aligned16::t1(){ void qa_32f_index_max_aligned16::t1(){ - + const int vlen = VEC_LEN; - + volk_runtime_init(); - + volk_environment_init(); int ret; @@ -47,8 +47,8 @@ void qa_32f_index_max_aligned16::t1(){ unsigned int* target_sse; unsigned int* target_generic; float* src0 ; - - + + unsigned int i_target_sse4_1; target_sse4_1 = &i_target_sse4_1; unsigned int i_target_sse; @@ -57,20 +57,20 @@ void qa_32f_index_max_aligned16::t1(){ target_generic = &i_target_generic; ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float)); - + random_floats((float*)src0, vlen); - + printf("32f_index_max_aligned16\n"); clock_t start, end; double total; - - + + start = clock(); for(int k = 0; k < NUM_ITERS; ++k) { volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic"); } - end = clock(); + end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic time: %f\n", total); @@ -78,25 +78,25 @@ void qa_32f_index_max_aligned16::t1(){ for(int k = 0; k < NUM_ITERS; ++k) { volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2"); } - - end = clock(); + + end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse time: %f\n", total); - + start = clock(); for(int k = 0; k < NUM_ITERS; ++k) { get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen); } - - end = clock(); + + end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse4.1 time: %f\n", total); - - + + printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]); CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]); CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]); - + free(src0); } diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc b/volk/lib/qa_32fc_index_max_aligned16.cc index 4d83f1639..3859bcb52 100644 --- a/volk/lib/qa_32fc_index_max_aligned16.cc +++ b/volk/lib/qa_32fc_index_max_aligned16.cc @@ -33,36 +33,36 @@ void qa_32fc_index_max_aligned16::t1(){ void qa_32fc_index_max_aligned16::t1(){ - + const int vlen = VEC_LEN; - + volk_environment_init(); int ret; - + unsigned int* target; unsigned int* target_generic; std::complex<float>* src0 ; - - + + unsigned int i_target; target = &i_target; unsigned int i_target_generic; target_generic = &i_target_generic; ret = posix_memalign((void**)&src0, 16, vlen << 3); - + random_floats((float*)src0, vlen * 2); - + printf("32fc_index_max_aligned16\n"); clock_t start, end; double total; - - + + start = clock(); for(int k = 0; k < NUM_ITERS; ++k) { volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic"); } - end = clock(); + end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic time: %f\n", total); @@ -70,19 +70,19 @@ void qa_32fc_index_max_aligned16::t1(){ for(int k = 0; k < NUM_ITERS; ++k) { volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3"); } - - end = clock(); + + end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3 time: %f\n", total); - - - + + + printf("generic: %u, sse3: %u\n", target_generic[0], target[0]); CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1); - - + + free(src0); } diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index 981bb19e6..daca31d9c 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -15,14 +15,14 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { #else void qa_32fc_power_spectral_density_32f_aligned16::t1() { - + volk_environment_init(); clock_t start, end; double total; const int vlen = 3201; const int ITERS = 10000; __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen]; - + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; @@ -30,7 +30,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { const float rbw = 1.7; float* inputLoad = (float*)input0; - for(int i = 0; i < 2*vlen; ++i) { + for(int i = 0; i < 2*vlen; ++i) { inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); } printf("32fc_power_spectral_density_32f_aligned\n"); @@ -54,7 +54,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); } - + for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc index fefdf06ee..b825c20e4 100644 --- a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc +++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc @@ -7,7 +7,7 @@ #define assertcomplexEqual(expected, actual, delta) \ CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); #define ERR_DELTA (1e-4) @@ -35,7 +35,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { std::complex<float>* input; std::complex<float>* taps; - + std::complex<float>* result_generic; std::complex<float>* result; @@ -43,19 +43,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { ret = posix_memalign((void**)&taps, 16, vlen << 3); ret = posix_memalign((void**)&result_generic, 16, 8); ret = posix_memalign((void**)&result, 16, 8); - + result_generic[0] = std::complex<float>(0,0); result[0] = std::complex<float>(0,0); random_floats((float*)input, vlen * 2); random_floats((float*)taps, vlen * 2); - - + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); - + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse"); printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); @@ -67,7 +67,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { free(taps); free(result_generic); free(result); - + } @@ -87,13 +87,13 @@ random_floats (float *buf, unsigned n) void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { const int vlen = 789743; - + volk_environment_init(); int ret; std::complex<float>* input; std::complex<float>* taps; - + std::complex<float>* result_generic; std::complex<float>* result; @@ -101,19 +101,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { ret = posix_memalign((void**)&taps, 16, vlen << 3); ret = posix_memalign((void**)&result_generic, 16, 8); ret = posix_memalign((void**)&result, 16, 8); - + result_generic[0] = std::complex<float>(0,0); result[0] = std::complex<float>(0,0); random_floats((float*)input, vlen * 2); random_floats((float*)taps, vlen * 2); - - + + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic"); - + volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32"); printf("32fc_x2_conjugate_dot_prod_32fc_u\n"); @@ -125,7 +125,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() { free(taps); free(result_generic); free(result); - + } diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index c880260f2..5559d933d 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -16,8 +16,8 @@ void qa_32u_popcnt_aligned16::t1() { #else void qa_32u_popcnt_aligned16::t1() { - - + + volk_runtime_init(); volk_environment_init(); @@ -26,7 +26,7 @@ void qa_32u_popcnt_aligned16::t1() { const int ITERS = 10000000; __VOLK_ATTR_ALIGNED(16) uint32_t input0; - + __VOLK_ATTR_ALIGNED(16) uint32_t output0; __VOLK_ATTR_ALIGNED(16) uint32_t output01; @@ -55,7 +55,7 @@ void qa_32u_popcnt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse4.2_time: %f\n", total); - + CPPUNIT_ASSERT_EQUAL(output0, output01); } diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 6be4e50ea..391601f22 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -16,8 +16,8 @@ void qa_64u_popcnt_aligned16::t1() { #else void qa_64u_popcnt_aligned16::t1() { - - + + volk_runtime_init(); volk_environment_init(); @@ -26,7 +26,7 @@ void qa_64u_popcnt_aligned16::t1() { const int ITERS = 10000000; __VOLK_ATTR_ALIGNED(16) uint64_t input0; - + __VOLK_ATTR_ALIGNED(16) uint64_t output0; __VOLK_ATTR_ALIGNED(16) uint64_t output01; @@ -55,7 +55,7 @@ void qa_64u_popcnt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse4.2_time: %f\n", total); - + CPPUNIT_ASSERT_EQUAL(output0, output01); } diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index bb37801c9..c15979b3f 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -46,7 +46,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { case 4: if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; - break; + break; case 2: if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; @@ -69,7 +69,7 @@ static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc archlist.push_back(std::string(desc.indices[i])); } - + return archlist; } @@ -81,15 +81,15 @@ volk_type_t volk_type_from_string(std::string name) { type.is_signed = false; type.size = 0; type.str = name; - + if(name.size() < 2) throw std::string("name too short to be a datatype"); - + //is it a scalar? - if(name[0] == 's') { + if(name[0] == 's') { type.is_scalar = true; name = name.substr(1, name.size()-1); } - + //get the data size size_t last_size_pos = name.find_last_of("0123456789"); if(last_size_pos < 0) throw std::string("no size spec in type ").append(name); @@ -98,7 +98,7 @@ volk_type_t volk_type_from_string(std::string name) { assert(((size % 8) == 0) && (size <= 64) && (size != 0)); type.size = size/8; //in bytes - + for(size_t i=last_size_pos+1; i < name.size(); i++) { switch (name[i]) { case 'f': @@ -117,19 +117,19 @@ volk_type_t volk_type_from_string(std::string name) { throw; } } - + return type; } -static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, - std::vector<volk_type_t> &outputsig, +static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, + std::vector<volk_type_t> &outputsig, std::string name) { boost::char_separator<char> sep("_"); boost::tokenizer<boost::char_separator<char> > tok(name, sep); std::vector<std::string> toked; tok.assign(name); toked.assign(tok.begin(), tok.end()); - + assert(toked[0] == "volk"); toked.erase(toked.begin()); @@ -143,7 +143,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, try { type = volk_type_from_string(token); if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... - + if(side == SIDE_INPUT) inputsig.push_back(type); else outputsig.push_back(type); } catch (...){ @@ -160,7 +160,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, side = SIDE_NAME; fn_name.append("_"); fn_name.append(token); - } + } else if(side == SIDE_OUTPUT) { if(token != toked.back()) throw; //the last token in the name is the alignment } @@ -223,7 +223,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { } } } - + return fail; } @@ -239,7 +239,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { } } } - + return fail; } @@ -264,10 +264,10 @@ bool run_volk_tests(struct volk_func_desc desc, std::vector<std::string> *best_arch_vector = 0 ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; - + //first let's get a list of available architectures for the test std::vector<std::string> arch_list = get_arch_list(desc); - + if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; return false; @@ -279,7 +279,7 @@ bool run_volk_tests(struct volk_func_desc desc, //now we have to get a function signature by parsing the name std::vector<volk_type_t> inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); - + //pull the input scalars into their own vector std::vector<volk_type_t> inputsc; for(size_t i=0; i<inputsig.size(); i++) { @@ -299,7 +299,7 @@ bool run_volk_tests(struct volk_func_desc desc, for(size_t i=0; i<inbuffs.size(); i++) { load_random_data(inbuffs[i], inputsig[i], vlen); } - + //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch std::vector<std::vector<void *> > test_data; for(size_t i=0; i<arch_list.size(); i++) { @@ -312,7 +312,7 @@ bool run_volk_tests(struct volk_func_desc desc, } test_data.push_back(arch_buffs); } - + std::vector<volk_type_t> both_sigs; both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); @@ -326,7 +326,7 @@ bool run_volk_tests(struct volk_func_desc desc, switch(both_sigs.size()) { case 1: if(inputsc.size() == 0) { - run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { if(inputsc[0].is_complex) { run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); @@ -364,23 +364,23 @@ bool run_volk_tests(struct volk_func_desc desc, throw "no function handler for this signature"; break; } - + end = clock(); double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; profile_times.push_back(arch_time); } - + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... size_t generic_offset=0; - for(size_t i=0; i<arch_list.size(); i++) + for(size_t i=0; i<arch_list.size(); i++) if(arch_list[i] == "generic") generic_offset=i; //now compare //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know - + bool fail = false; bool fail_global = false; std::vector<bool> arch_results; @@ -438,7 +438,7 @@ bool run_volk_tests(struct volk_func_desc desc, } arch_results.push_back(!fail); } - + double best_time = std::numeric_limits<double>::max(); std::string best_arch = "generic"; for(size_t i=0; i < arch_list.size(); i++) { @@ -447,7 +447,7 @@ bool run_volk_tests(struct volk_func_desc desc, best_arch = arch_list[i]; } } - + std::cout << "Best arch: " << best_arch << std::endl; if(best_arch_vector) { best_arch_vector->push_back(name + std::string(" ") + best_arch); diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c index 7e705bed4..5e5c9dfff 100644 --- a/volk/lib/volk_prefs.c +++ b/volk/lib/volk_prefs.c @@ -26,7 +26,7 @@ int load_preferences(struct volk_arch_pref **prefs) { char path[512], line[512], function[128], arch[32]; int n_arch_prefs = 0; struct volk_arch_pref *t_pref; - + //get the config path get_config_path(path); if (path == NULL) return n_arch_prefs; //no prefs found diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 4baa078bc..865d60955 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -26,14 +26,14 @@ unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsign n_arch_prefs = load_preferences(&volk_arch_prefs); prefs_loaded = 1; } - + //now look for the function name in the prefs list for(i=0; i < n_arch_prefs; i++) { if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it return get_index(indices, n_archs, volk_arch_prefs[i].arch); } } - + for(i=1; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; |