diff options
author | Tom Rondeau | 2010-12-07 18:50:28 -0500 |
---|---|---|
committer | Tom Rondeau | 2010-12-07 18:50:28 -0500 |
commit | 239144659b29c0a5ecd83a34e0e57387a1060ed7 (patch) | |
tree | 3476e1c123da4696c64cc1756ddec5d971bcf9f2 /volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | |
parent | e13783aeb84a2c3656c3344a8d52fa2c9ee38a00 (diff) | |
download | gnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.tar.gz gnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.tar.bz2 gnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.zip |
Initial checkin for VOLK - Vector-Optimized Library of Kernels. This is a new SIMD library.
It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time.
Diffstat (limited to 'volk/lib/qa_16sc_deinterleave_16s_aligned16.cc')
-rw-r--r-- | volk/lib/qa_16sc_deinterleave_16s_aligned16.cc | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc new file mode 100644 index 000000000..e700ac72c --- /dev/null +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -0,0 +1,76 @@ +#include <volk/volk.h> +#include <qa_16sc_deinterleave_16s_aligned16.h> +#include <volk/volk_16sc_deinterleave_16s_aligned16.h> +#include <cstdlib> + +//test for sse + +#ifndef LV_HAVE_SSSE3 + +void qa_16sc_deinterleave_16s_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16sc_deinterleave_16s_aligned16::t1() { + + volk_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 100000; + std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); + + int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_generic1[vlen] __attribute__ ((aligned (16))); + int16_t output_sse2[vlen] __attribute__ ((aligned (16))); + int16_t output_sse21[vlen] __attribute__ ((aligned (16))); + int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); + int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); + + int16_t* loadInput = (int16_t*)input0; + for(int i = 0; i < vlen*2; ++i) { + loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0)); + } + printf("16sc_deinterleave_16s_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("ssse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_sse21[i]); + + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); + } +} + +#endif |