diff options
-rw-r--r-- | volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 6 | ||||
-rw-r--r-- | volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 | ||||
-rw-r--r-- | volk/orc/Makefile.am | 1 | ||||
-rw-r--r-- | volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc | 27 | ||||
-rw-r--r-- | volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc | 2 |
5 files changed, 20 insertions, 24 deletions
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index dd4ae75ff..d00315b57 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); -/* start = clock(); + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); } end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("orc_time: %f\n", total); -*/ + start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); @@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); - //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 105d32d0c..53b3bf790 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("sse3_time: %f\n", total); - //for(int i = 0; i < 10; ++i) { - // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); - // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); - //} + for(int i = 0; i < 10; ++i) { + printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); + } for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am index 797efee18..a469638c1 100644 --- a/volk/orc/Makefile.am +++ b/volk/orc/Makefile.am @@ -39,6 +39,7 @@ volk_32f_max_aligned16_orc_impl.orc \ volk_32f_min_aligned16_orc_impl.orc \ volk_32fc_magnitude_32f_aligned16_orc_impl.orc \ volk_32fc_magnitude_16s_aligned16_orc_impl.orc \ +volk_16sc_magnitude_16s_aligned16_orc_impl.orc \ volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \ volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \ volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc index 83b867dca..088f56312 100644 --- a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc +++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc @@ -2,26 +2,21 @@ .source 4 src .dest 2 dst .floatparam 4 scalar -.temp 2 reals -.temp 2 imags -.temp 4 reall -.temp 4 imagl -.temp 4 realf -.temp 4 imagf +.temp 8 iql +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if .temp 4 sumf .temp 4 rootf .temp 4 rootl -splitlw reals, imags, src -convswl reall, reals -convswl imagl, imags -convlf realf, reall -convlf imagf, imagl -divf realf, realf, scalar -divf imagf, imagf, scalar -mulf realf, realf, realf -mulf imagf, imagf, imagf -addf sumf, realf, imagf +x2 convswl iql, src +x2 convlf iqf, iql +x2 divf iqf, iqf, scalar +x2 mulf prodiqf, iqf, iqf +splitql qf, if, prodiqf +addf sumf, if, qf sqrtf rootf, sumf mulf rootf, rootf, scalar convfl rootl, rootf diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc index f71dd9a37..48b831021 100644 --- a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc +++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc @@ -16,4 +16,4 @@ addf sumf, if, qf sqrtf rootf, sumf mulf rootf, rootf, scalar convfl rootl, rootf -convssslw dst, rootl +convlw dst, rootl |