diff options
Diffstat (limited to 'volk/lib')
-rw-r--r-- | volk/lib/Makefile.am | 2 | ||||
-rw-r--r-- | volk/lib/qa_32f_normalize_aligned16.cc | 13 | ||||
-rw-r--r-- | volk/lib/qa_32fc_32f_multiply_aligned16.cc | 84 | ||||
-rw-r--r-- | volk/lib/qa_32fc_multiply_aligned16.cc | 12 |
4 files changed, 64 insertions, 47 deletions
diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 253033461..0aeafe4aa 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -156,6 +156,7 @@ endif # ---------------------------------------------------------------- libvolk_qa_la_SOURCES = \ qa_volk.cc \ + qa_utils.cc \ qa_16s_quad_max_star_aligned16.cc \ qa_32fc_dot_prod_aligned16.cc \ qa_32fc_square_dist_aligned16.cc \ @@ -257,6 +258,7 @@ libvolk_qa_la_LIBADD = \ noinst_HEADERS = \ volk_init.h \ qa_volk.h \ + qa_utils.h \ assembly.h \ qa_16s_quad_max_star_aligned16.h \ qa_32fc_dot_prod_aligned16.h \ diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc index 1c7b485a6..0da43ecff 100644 --- a/volk/lib/qa_32f_normalize_aligned16.cc +++ b/volk/lib/qa_32f_normalize_aligned16.cc @@ -26,13 +26,16 @@ void qa_32f_normalize_aligned16::t1() { float* output0; float* output01; + float* output02; ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float)); ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float)); + ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float)); for(int i = 0; i < vlen; ++i) { output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(float)); + memcpy(output02, output0, vlen*sizeof(float)); printf("32f_normalize_aligned\n"); start = clock(); @@ -49,6 +52,14 @@ void qa_32f_normalize_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + for(int i = 0; i < 1; ++i) { //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); @@ -57,10 +68,12 @@ void qa_32f_normalize_aligned16::t1() { for(int i = 0; i < vlen; ++i) { // printf("%e...%e\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4); } free(output0); free(output01); + free(output02); } #endif diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc index 4eba0a3cd..7bb8d21c1 100644 --- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc @@ -2,28 +2,12 @@ #include <volk/volk.h> #include <qa_32fc_32f_multiply_aligned16.h> #include <stdlib.h> -#include <math.h> #include <time.h> - -#define assertcomplexEqual(expected, actual, delta) \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ - CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); +#include <string.h> +#include <qa_utils.h> #define ERR_DELTA (1e-4) -//test for sse -static float uniform() { - return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) -} - -static void -random_floats (float *buf, unsigned n) -{ - for (unsigned i = 0; i < n; i++) - buf[i] = uniform (); -} - -#ifdef LV_HAVE_SSE3 void qa_32fc_32f_multiply_aligned16::t1() { const int vlen = 2046; @@ -36,50 +20,56 @@ void qa_32fc_32f_multiply_aligned16::t1() { std::complex<float>* input; float * taps; int i; + std::vector<std::string> archs; + archs.push_back("generic"); +#ifdef LV_HAVE_SSE3 + archs.push_back("sse3"); +#endif +#ifdef LV_HAVE_ORC + archs.push_back("orc"); +#endif - std::complex<float>* result_generic; - std::complex<float>* result_sse3; + std::vector<std::complex<float>* > results; ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float)); - ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float)); - ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float)); + + for(i=0; i < archs.size(); i++) { + std::complex<float> *ptr; + ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float)); + if(ret) { + printf("Couldn't allocate memory\n"); + exit(1); + } + results.push_back(ptr); + } random_floats((float*)input, vlen * 2); random_floats(taps, vlen); printf("32fc_32f_multiply_aligned16\n"); - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); - } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("generic_time: %f\n", total); - - start = clock(); - for(int count = 0; count < ITERS; ++count) { - volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + for(i=0; i < archs.size(); i++) { + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str()); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("%s_time: %f\n", archs[i].c_str(), total); } - end = clock(); - total = (double)(end-start)/(double)CLOCKS_PER_SEC; - printf("sse3_time: %f\n", total); - for(i = 0; i < vlen; i++){ - assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + for(i=0; i < vlen; i++) { + int j = 1; + for(j; j < archs.size(); j++) { + assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA); + } } free(input); free(taps); - free(result_generic); - free(result_sse3); - -} -#else -void qa_32fc_32f_multiply_aligned16::t1() { - printf("sse3 not available... no test performed\n"); + for(i=0; i < archs.size(); i++) { + free(results[i]); + } } -#endif /* LV_HAVE_SSE3 */ - diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc index e1f7eab3d..022b58ad6 100644 --- a/volk/lib/qa_32fc_multiply_aligned16.cc +++ b/volk/lib/qa_32fc_multiply_aligned16.cc @@ -41,11 +41,13 @@ void qa_32fc_multiply_aligned16::t1() { std::complex<float>* result_generic; std::complex<float>* result_sse3; + std::complex<float>* result_orc; ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float)); random_floats((float*)input, vlen * 2); random_floats((float*)taps, vlen * 2); @@ -67,15 +69,25 @@ void qa_32fc_multiply_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); for(i = 0; i < vlen; i++){ assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA); } free(input); free(taps); free(result_generic); free(result_sse3); + free(result_orc); } #else |