From 239144659b29c0a5ecd83a34e0e57387a1060ed7 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Dec 2010 18:50:28 -0500 Subject: Initial checkin for VOLK - Vector-Optimized Library of Kernels. This is a new SIMD library. It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time. --- .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 123 +++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc (limited to 'volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc') diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc new file mode 100644 index 000000000..0f4ba6923 --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE4_1 + +#ifndef LV_HAVE_SSE + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0)); + } + printf("16sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* SSE */ + +#else + +void qa_16sc_deinterleave_real_32f_aligned16::t1() { + + volk_runtime_init(); + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex input0[vlen] __attribute__ ((aligned (16))); + + float output_generic[vlen] __attribute__ ((aligned (16))); + float output_sse[vlen] __attribute__ ((aligned (16))); + float output_sse4_1[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * 32768.0); + } + printf("16sc_deinterleave_real_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4_1_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4); + } +} + +#endif /* SSE4_1 */ -- cgit From ce3e4c33d170b65cf288faec7d8da6a496eb6101 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Thu, 16 Dec 2010 21:33:54 -0500 Subject: Including time header to qa files. --- volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 1 + 1 file changed, 1 insertion(+) (limited to 'volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc') diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc index 0f4ba6923..f86f03b88 100644 --- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc @@ -3,6 +3,7 @@ #include #include #include +#include //test for sse -- cgit