summaryrefslogtreecommitdiff
path: root/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
diff options
context:
space:
mode:
authorJohnathan Corgan2011-03-16 08:18:30 -0700
committerJohnathan Corgan2011-03-16 08:18:30 -0700
commit4cd06fadac972d4cac559c15488bc39823063afe (patch)
tree6fe57896bc216d8c9e672194f2e4e0e75aedb645 /volk/lib/qa_16s_branch_4_state_8_aligned16.cc
parent4ad736e21f45f64fd616bb53f54e45e1c63e7330 (diff)
parent1d70ed2bd928d52a383e688949cc7f747dd584fa (diff)
downloadgnuradio-4cd06fadac972d4cac559c15488bc39823063afe.tar.gz
gnuradio-4cd06fadac972d4cac559c15488bc39823063afe.tar.bz2
gnuradio-4cd06fadac972d4cac559c15488bc39823063afe.zip
Merge remote branch 'gnuradio/next'
* gnuradio/next: (806 commits) gruel: added missing ignores gruel: fixed swig interface file to dereference pmt_t. qtgui: fix distcheck error gruel: fixing structure. Passes make check. gruel: SWIGing Gruel into Python to access PMTs. gnuradio-examples: add C++ audio examples using new gr-audio created gruel/attributes.h to house compiler specific attribute macros audio: remove obsoleted individual top-level components gr-audio: added README and default config fix volk: simplify the get new method for the aligned pool grc: moved all usrp1 and usrp2 stuff out of grc and into gr-usrp*/grc grc: swap store the subprocess object rather than the pid when executing qtgui: removed python directory that was added, never used uhd: use %ignore to hide warnings and fix errors Added/updated ignore files. Fixing gr_filter_design program to import from gnuradio Python package. audio: high prio for platform specific audio osx audio: added windows and osx audio source files audio: added config checks for other audios, added jack and port audio: make prefs look like old audio, removed old audio.py ...
Diffstat (limited to 'volk/lib/qa_16s_branch_4_state_8_aligned16.cc')
-rw-r--r--volk/lib/qa_16s_branch_4_state_8_aligned16.cc106
1 files changed, 106 insertions, 0 deletions
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
new file mode 100644
index 000000000..62deffaeb
--- /dev/null
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -0,0 +1,106 @@
+#include <volk/volk.h>
+#include <qa_16s_branch_4_state_8_aligned16.h>
+#include <cstdlib>
+#include <ctime>
+
+//test for ssse3
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16s_branch_4_state_8_aligned16::t1() {
+ printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_branch_4_state_8_aligned16::t1() {
+ const int num_iters = 1000000;
+ const int vlen = 32;
+
+ static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
+ static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
+ static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
+ static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
+ static char* permuters[4] = {permute0, permute1, permute2, permute3};
+
+ unsigned int num_bytes = vlen << 1;
+
+ volk_environment_init();
+ clock_t start, end;
+ double total;
+
+ short target[vlen] __attribute__ ((aligned (16)));
+ short target2[vlen] __attribute__ ((aligned (16)));
+ short target3[vlen] __attribute__ ((aligned (16)));
+
+ short src0[vlen] __attribute__ ((aligned (16)));
+ short permute_indexes[vlen] __attribute__ ((aligned (16))) = {
+7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
+ short cntl0[vlen] __attribute__ ((aligned (16))) = {
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
+ short cntl1[vlen] __attribute__ ((aligned (16))) = {
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
+ short cntl2[vlen] __attribute__ ((aligned (16))) = {
+ 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
+ short cntl3[vlen] __attribute__ ((aligned (16))) = {
+ 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
+ short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+
+
+
+ for(int i = 0; i < vlen; ++i) {
+ src0[i] = i;
+
+ }
+
+
+ printf("16s_branch_4_state_8_aligned\n");
+
+
+ start = clock();
+ for(int i = 0; i < num_iters; ++i) {
+ volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
+ }
+ end = clock();
+
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+ printf("permute_and_scalar_add_time: %f\n", total);
+
+
+
+ start = clock();
+ for(int i = 0; i < num_iters; ++i) {
+ volk_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
+ }
+ end = clock();
+
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+ printf("branch_4_state_8_time, ssse3: %f\n", total);
+
+ start = clock();
+ for(int i = 0; i < num_iters; ++i) {
+ volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
+ }
+ end = clock();
+
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+ printf("permute_and_scalar_add_time, generic: %f\n", total);
+
+
+
+ for(int i = 0; i < vlen; ++i) {
+ printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
+ }
+
+ for(int i = 0; i < vlen; ++i) {
+
+ CPPUNIT_ASSERT(target[i] == target2[i]);
+ CPPUNIT_ASSERT(target[i] == target3[i]);
+ }
+}
+
+
+#endif