diff options
author | Johnathan Corgan | 2011-03-16 08:18:30 -0700 |
---|---|---|
committer | Johnathan Corgan | 2011-03-16 08:18:30 -0700 |
commit | 4cd06fadac972d4cac559c15488bc39823063afe (patch) | |
tree | 6fe57896bc216d8c9e672194f2e4e0e75aedb645 /volk/lib/qa_16s_branch_4_state_8_aligned16.cc | |
parent | 4ad736e21f45f64fd616bb53f54e45e1c63e7330 (diff) | |
parent | 1d70ed2bd928d52a383e688949cc7f747dd584fa (diff) | |
download | gnuradio-4cd06fadac972d4cac559c15488bc39823063afe.tar.gz gnuradio-4cd06fadac972d4cac559c15488bc39823063afe.tar.bz2 gnuradio-4cd06fadac972d4cac559c15488bc39823063afe.zip |
Merge remote branch 'gnuradio/next'
* gnuradio/next: (806 commits)
gruel: added missing ignores
gruel: fixed swig interface file to dereference pmt_t.
qtgui: fix distcheck error
gruel: fixing structure. Passes make check.
gruel: SWIGing Gruel into Python to access PMTs.
gnuradio-examples: add C++ audio examples using new gr-audio
created gruel/attributes.h to house compiler specific attribute macros
audio: remove obsoleted individual top-level components
gr-audio: added README and default config fix
volk: simplify the get new method for the aligned pool
grc: moved all usrp1 and usrp2 stuff out of grc and into gr-usrp*/grc
grc: swap store the subprocess object rather than the pid when executing
qtgui: removed python directory that was added, never used
uhd: use %ignore to hide warnings and fix errors
Added/updated ignore files.
Fixing gr_filter_design program to import from gnuradio Python package.
audio: high prio for platform specific audio osx
audio: added windows and osx audio source files
audio: added config checks for other audios, added jack and port
audio: make prefs look like old audio, removed old audio.py
...
Diffstat (limited to 'volk/lib/qa_16s_branch_4_state_8_aligned16.cc')
-rw-r--r-- | volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc new file mode 100644 index 000000000..62deffaeb --- /dev/null +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -0,0 +1,106 @@ +#include <volk/volk.h> +#include <qa_16s_branch_4_state_8_aligned16.h> +#include <cstdlib> +#include <ctime> + +//test for ssse3 + +#ifndef LV_HAVE_SSSE3 + +void qa_16s_branch_4_state_8_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16s_branch_4_state_8_aligned16::t1() { + const int num_iters = 1000000; + const int vlen = 32; + + static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03}; + static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01}; + static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f}; + static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d}; + static char* permuters[4] = {permute0, permute1, permute2, permute3}; + + unsigned int num_bytes = vlen << 1; + + volk_environment_init(); + clock_t start, end; + double total; + + short target[vlen] __attribute__ ((aligned (16))); + short target2[vlen] __attribute__ ((aligned (16))); + short target3[vlen] __attribute__ ((aligned (16))); + + short src0[vlen] __attribute__ ((aligned (16))); + short permute_indexes[vlen] __attribute__ ((aligned (16))) = { +7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; + short cntl0[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + short cntl1[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + short cntl2[vlen] __attribute__ ((aligned (16))) = { + 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; + short cntl3[vlen] __attribute__ ((aligned (16))) = { + 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; + short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + + + + for(int i = 0; i < vlen; ++i) { + src0[i] = i; + + } + + + printf("16s_branch_4_state_8_aligned\n"); + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time: %f\n", total); + + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("branch_4_state_8_time, ssse3: %f\n", total); + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time, generic: %f\n", total); + + + + for(int i = 0; i < vlen; ++i) { + printf("psa... %d, b4s8... %d\n", target[i], target3[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT(target[i] == target2[i]); + CPPUNIT_ASSERT(target[i] == target3[i]); + } +} + + +#endif |