summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/lib/qa_16sc_magnitude_16s_aligned16.cc6
-rw-r--r--volk/lib/qa_32fc_magnitude_16s_aligned16.cc8
-rw-r--r--volk/orc/Makefile.am1
-rw-r--r--volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc27
-rw-r--r--volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc2
5 files changed, 20 insertions, 24 deletions
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index dd4ae75ff..d00315b57 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
-/* start = clock();
+ start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
-*/
+
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
- //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 105d32d0c..53b3bf790 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
- //for(int i = 0; i < 10; ++i) {
- // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
- // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
- //}
+ for(int i = 0; i < 10; ++i) {
+ printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+ printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+ }
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am
index 797efee18..a469638c1 100644
--- a/volk/orc/Makefile.am
+++ b/volk/orc/Makefile.am
@@ -39,6 +39,7 @@ volk_32f_max_aligned16_orc_impl.orc \
volk_32f_min_aligned16_orc_impl.orc \
volk_32fc_magnitude_32f_aligned16_orc_impl.orc \
volk_32fc_magnitude_16s_aligned16_orc_impl.orc \
+volk_16sc_magnitude_16s_aligned16_orc_impl.orc \
volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \
volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \
volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
index 83b867dca..088f56312 100644
--- a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
+++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
@@ -2,26 +2,21 @@
.source 4 src
.dest 2 dst
.floatparam 4 scalar
-.temp 2 reals
-.temp 2 imags
-.temp 4 reall
-.temp 4 imagl
-.temp 4 realf
-.temp 4 imagf
+.temp 8 iql
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
.temp 4 sumf
.temp 4 rootf
.temp 4 rootl
-splitlw reals, imags, src
-convswl reall, reals
-convswl imagl, imags
-convlf realf, reall
-convlf imagf, imagl
-divf realf, realf, scalar
-divf imagf, imagf, scalar
-mulf realf, realf, realf
-mulf imagf, imagf, imagf
-addf sumf, realf, imagf
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+x2 mulf prodiqf, iqf, iqf
+splitql qf, if, prodiqf
+addf sumf, if, qf
sqrtf rootf, sumf
mulf rootf, rootf, scalar
convfl rootl, rootf
diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
index f71dd9a37..48b831021 100644
--- a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
+++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
@@ -16,4 +16,4 @@ addf sumf, if, qf
sqrtf rootf, sumf
mulf rootf, rootf, scalar
convfl rootl, rootf
-convssslw dst, rootl
+convlw dst, rootl