summaryrefslogtreecommitdiff
path: root/volk/lib/qa_32f_index_max_aligned16.cc
diff options
context:
space:
mode:
authorTom Rondeau2010-12-07 18:50:28 -0500
committerTom Rondeau2010-12-07 18:50:28 -0500
commit239144659b29c0a5ecd83a34e0e57387a1060ed7 (patch)
tree3476e1c123da4696c64cc1756ddec5d971bcf9f2 /volk/lib/qa_32f_index_max_aligned16.cc
parente13783aeb84a2c3656c3344a8d52fa2c9ee38a00 (diff)
downloadgnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.tar.gz
gnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.tar.bz2
gnuradio-239144659b29c0a5ecd83a34e0e57387a1060ed7.zip
Initial checkin for VOLK - Vector-Optimized Library of Kernels. This is a new SIMD library.
It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time.
Diffstat (limited to 'volk/lib/qa_32f_index_max_aligned16.cc')
-rw-r--r--volk/lib/qa_32f_index_max_aligned16.cc103
1 files changed, 103 insertions, 0 deletions
diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc
new file mode 100644
index 000000000..a1c3d4cd1
--- /dev/null
+++ b/volk/lib/qa_32f_index_max_aligned16.cc
@@ -0,0 +1,103 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_index_max_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define ERR_DELTA (1e-4)
+#define NUM_ITERS 1000000
+#define VEC_LEN 3097
+static float uniform() {
+ return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+ unsigned int i = 0;
+ for (; i < n; i++) {
+
+ buf[i] = uniform () * 32767;
+
+ }
+}
+
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_index_max_aligned16::t1(){
+ printf("sse not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32f_index_max_aligned16::t1(){
+
+ const int vlen = VEC_LEN;
+
+
+ volk_runtime_init();
+
+ volk_environment_init();
+ int ret;
+
+ unsigned int* target_sse4_1;
+ unsigned int* target_sse;
+ unsigned int* target_generic;
+ float* src0 ;
+
+
+ unsigned int i_target_sse4_1;
+ target_sse4_1 = &i_target_sse4_1;
+ unsigned int i_target_sse;
+ target_sse = &i_target_sse;
+ unsigned int i_target_generic;
+ target_generic = &i_target_generic;
+
+ ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
+
+ random_floats((float*)src0, vlen);
+
+ printf("32f_index_max_aligned16\n");
+
+ clock_t start, end;
+ double total;
+
+
+ start = clock();
+ for(int k = 0; k < NUM_ITERS; ++k) {
+ volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("generic time: %f\n", total);
+
+ start = clock();
+ for(int k = 0; k < NUM_ITERS; ++k) {
+ volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
+ }
+
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("sse time: %f\n", total);
+
+ start = clock();
+ for(int k = 0; k < NUM_ITERS; ++k) {
+ get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen);
+ }
+
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("sse4.1 time: %f\n", total);
+
+
+ printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
+ CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
+ CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
+
+ free(src0);
+}
+
+#endif /*LV_HAVE_SSE3*/