summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
authorNick Foster2010-12-15 16:27:42 -0800
committerNick Foster2010-12-15 16:27:42 -0800
commit15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 (patch)
tree1258d6349de630194341bdaab76628a9f9cc9ec2 /volk/lib
parentf9ee6a55cb397f9302769a25a8c959fa162354f0 (diff)
downloadgnuradio-15ad4b5398e474bfb52fdb7e826b69f3e398c0b0.tar.gz
gnuradio-15ad4b5398e474bfb52fdb7e826b69f3e398c0b0.tar.bz2
gnuradio-15ad4b5398e474bfb52fdb7e826b69f3e398c0b0.zip
Volk: A bunch of new ORC routines plus tests.
Also fixed a typo in the generic version of 16sc_magnitude_16s_a16.
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/qa_16sc_magnitude_16s_aligned16.cc9
-rw-r--r--volk/lib/qa_16sc_magnitude_32f_aligned16.cc20
-rw-r--r--volk/lib/qa_32f_divide_aligned16.cc9
-rw-r--r--volk/lib/qa_32fc_magnitude_16s_aligned16.cc9
-rw-r--r--volk/lib/qa_32fc_magnitude_32f_aligned16.cc9
-rw-r--r--volk/lib/qa_32s_or_aligned16.cc9
6 files changed, 65 insertions, 0 deletions
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index b14610757..c8f13ff84 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t output_sse[vlen] __attribute__ ((aligned (16)));
int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -41,6 +42,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
}
end = clock();
@@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 2c9e48f6e..e7178863c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_known[vlen] __attribute__ ((aligned (16)));
int16_t* inputLoad = (int16_t*)input0;
@@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
/*
for(int i = 0; i < 100; ++i) {
@@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
}
}
@@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_sse[vlen] __attribute__ ((aligned (16)));
float output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -81,6 +92,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index f104e0443..b2c2ecf9a 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -104,6 +105,13 @@ void qa_32f_divide_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index a4be1616b..c3e65866b 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t output_sse[vlen] __attribute__ ((aligned (16)));
int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -41,6 +42,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
index d69ada408..6a1d46c7a 100644
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_sse[vlen] __attribute__ ((aligned (16)));
float output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -41,6 +42,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
}
end = clock();
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
index e09dfb91c..9ea5283a6 100644
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() {
int32_t output0[vlen] __attribute__ ((aligned (16)));
int32_t output01[vlen] __attribute__ ((aligned (16)));
+ int32_t output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -41,6 +42,13 @@ void qa_32s_or_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}