From 239144659b29c0a5ecd83a34e0e57387a1060ed7 Mon Sep 17 00:00:00 2001 From: Tom Rondeau Date: Tue, 7 Dec 2010 18:50:28 -0500 Subject: Initial checkin for VOLK - Vector-Optimized Library of Kernels. This is a new SIMD library. It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time. --- volk/lib/qa_32fc_multiply_aligned16.cc | 86 ++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc (limited to 'volk/lib/qa_32fc_multiply_aligned16.cc') diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc new file mode 100644 index 000000000..e1f7eab3d --- /dev/null +++ b/volk/lib/qa_32fc_multiply_aligned16.cc @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include + + + +#define assertcomplexEqual(expected, actual, delta) \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \ + CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta); + +#define ERR_DELTA (1e-3) + +//test for sse +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +#ifdef LV_HAVE_SSE3 +void qa_32fc_multiply_aligned16::t1() { + + const int vlen = 2046; + const int ITERS = 100000; + + int i; + volk_environment_init(); + int ret; + clock_t start, end; + double total; + std::complex* input; + std::complex* taps; + + std::complex* result_generic; + std::complex* result_sse3; + + ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float)); + ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float)); + + random_floats((float*)input, vlen * 2); + random_floats((float*)taps, vlen * 2); + + printf("32fc_multiply_aligned16\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(i = 0; i < vlen; i++){ + assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA); + } + + free(input); + free(taps); + free(result_generic); + free(result_sse3); + +} +#else +void qa_32fc_multiply_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#endif /* LV_HAVE_SSE3 */ -- cgit