summaryrefslogtreecommitdiff
path: root/volk
diff options
context:
space:
mode:
Diffstat (limited to 'volk')
-rw-r--r--volk/config/orc.m42
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h14
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h15
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h13
-rw-r--r--volk/include/volk/volk_16sc_magnitude_16s_aligned16.h6
-rw-r--r--volk/include/volk/volk_16sc_magnitude_32f_aligned16.h2
-rw-r--r--volk/include/volk/volk_32f_max_aligned16.h14
-rw-r--r--volk/include/volk/volk_32f_min_aligned16.h14
-rw-r--r--volk/lib/qa_16sc_deinterleave_16s_aligned16.cc12
-rw-r--r--volk/lib/qa_16sc_deinterleave_32f_aligned16.cc11
-rw-r--r--volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc9
-rw-r--r--volk/lib/qa_16sc_magnitude_16s_aligned16.cc5
-rw-r--r--volk/lib/qa_16sc_magnitude_32f_aligned16.cc6
-rw-r--r--volk/lib/qa_32f_max_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_min_aligned16.cc9
-rw-r--r--volk/lib/qa_32fc_magnitude_16s_aligned16.cc8
-rw-r--r--volk/lib/qa_volk.cc1
-rw-r--r--volk/orc/Makefile.am10
-rw-r--r--volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc12
-rw-r--r--volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc6
-rw-r--r--volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc14
-rw-r--r--volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc8
-rw-r--r--volk/orc/volk_32f_max_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_min_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc24
-rw-r--r--volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc22
-rw-r--r--volk/volk.pc.in2
28 files changed, 202 insertions, 61 deletions
diff --git a/volk/config/orc.m4 b/volk/config/orc.m4
index 9645661b0..a4653400c 100644
--- a/volk/config/orc.m4
+++ b/volk/config/orc.m4
@@ -5,7 +5,7 @@ dnl ORC_CHECK([REQUIRED_VERSION])
AC_DEFUN([ORC_CHECK],
[
- ORC_REQ=ifelse([$1], , "0.4.6", [$1])
+ ORC_REQ=ifelse([$1], , "0.4.10", [$1])
enable_orc = auto
if test "x$enable_orc" != "xno" ; then
diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
index 32e13df98..cf94a3f38 100644
--- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
@@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Deinterleaves the complex 16 bit vector into I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16sc_deinterleave_16s_aligned16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16sc_deinterleave_16s_aligned16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+ volk_16sc_deinterleave_16s_aligned16_orc_impl(iBuffer, qBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
index 86f67437d..50b8b62d5 100644
--- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
@@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer,
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+ /*!
+ \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param scalar The data value to be divided against each input data value of the input complex vector
+ \param num_points The number of complex data values to be deinterleaved
+ */
+extern void volk_16sc_deinterleave_32f_aligned16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16sc_deinterleave_32f_aligned16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+ volk_16sc_deinterleave_32f_aligned16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
index c0d1e941a..2dd85a422 100644
--- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
@@ -77,7 +77,18 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuf
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16sc_deinterleave_real_8s_aligned16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16sc_deinterleave_real_8s_aligned16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+ volk_16sc_deinterleave_real_8s_aligned16_orc_impl(iBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
index 9f3222aa6..41e8751d6 100644
--- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
+++ b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
@@ -173,16 +173,16 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
}
#endif /* LV_HAVE_GENERIC */
-#if LV_HAVE_ORC
+#if LV_HAVE_ORC_DISABLED
/*!
\brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
-extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points);
+extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
static inline void volk_16sc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
- volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, num_points);
+ volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
}
#endif /* LV_HAVE_ORC */
diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
index e063ae432..c2605d551 100644
--- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
@@ -161,7 +161,7 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec
}
#endif /* LV_HAVE_GENERIC */
-#if LV_HAVE_ORC
+#if LV_HAVE_ORC_DISABLED
/*!
\brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_max_aligned16.h
index 96aafb2bf..d4e30fba8 100644
--- a/volk/include/volk/volk_32f_max_aligned16.h
+++ b/volk/include/volk/volk_32f_max_aligned16.h
@@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_max_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_max_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_max_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_min_aligned16.h
index e247f4213..55daafb6a 100644
--- a/volk/include/volk/volk_32f_min_aligned16.h
+++ b/volk/include/volk/volk_32f_min_aligned16.h
@@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_min_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_min_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_min_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index e700ac72c..7e9e31df5 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
@@ -44,6 +46,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
}
end = clock();
@@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]);
+
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index 6ee076998..45100206d 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
float output_generic1[vlen] __attribute__ ((aligned (16)));
float output_sse2[vlen] __attribute__ ((aligned (16)));
float output_sse21[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
+ float output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -42,6 +44,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index 5ab458bc9..d187d20c3 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
int8_t output_generic[vlen] __attribute__ ((aligned (16)));
int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+ int8_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -40,6 +41,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index c8f13ff84..dd4ae75ff 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
- start = clock();
+/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
+*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
- CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+ //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index e7178863c..53d42e28c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
- start = clock();
+/* start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("orc_time: %f\n", total);
-
+*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
- CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index 3ef375176..cb1fd3627 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -41,6 +42,13 @@ void qa_32f_max_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 617e18b24..bf453f360 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -41,6 +42,13 @@ void qa_32f_min_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index c3e65866b..105d32d0c 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
- for(int i = 0; i < 1; ++i) {
- //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
- //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
- }
+ //for(int i = 0; i < 10; ++i) {
+ // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+ // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+ //}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..f6a334da7 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,7 +118,6 @@ CppUnit::TestSuite *
qa_volk::suite()
{
CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
s->addTest(qa_16s_quad_max_star_aligned16::suite());
s->addTest(qa_32fc_dot_prod_aligned16::suite());
s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am
index 3f105fd80..797efee18 100644
--- a/volk/orc/Makefile.am
+++ b/volk/orc/Makefile.am
@@ -35,13 +35,17 @@ volk_32f_subtract_aligned16_orc_impl.orc \
volk_32f_divide_aligned16_orc_impl.orc \
volk_32f_multiply_aligned16_orc_impl.orc \
volk_32f_sqrt_aligned16_orc_impl.orc \
-volk_16sc_magnitude_32f_aligned16_orc_impl.orc \
+volk_32f_max_aligned16_orc_impl.orc \
+volk_32f_min_aligned16_orc_impl.orc \
volk_32fc_magnitude_32f_aligned16_orc_impl.orc \
-volk_32fc_magnitude_16s_aligned16_orc_impl.orc
+volk_32fc_magnitude_16s_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
-my_ORCC_FLAGS = --implementation --lazy-init $(ORCC_FLAGS)
+my_ORCC_FLAGS = --implementation $(ORCC_FLAGS)
.orc.c:
$(ORCC) $(my_ORCC_FLAGS) -o $@ $<
diff --git a/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..d226064a7
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_16sc_deinterleave_16s_aligned16_orc_impl
+.dest 2 idst
+.dest 2 qdst
+.source 4 src
+splitlw qdst, idst, src
diff --git a/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..dddf682ca
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc
@@ -0,0 +1,12 @@
+.function volk_16sc_deinterleave_32f_aligned16_orc_impl
+.dest 4 idst
+.dest 4 qdst
+.source 4 src
+.floatparam 4 scalar
+.temp 8 iql
+.temp 8 iqf
+
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+splitql qdst, idst, iqf
diff --git a/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..609750096
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
@@ -0,0 +1,6 @@
+.function volk_16sc_deinterleave_real_8s_aligned16_orc_impl
+.dest 1 dst
+.source 4 src
+.temp 2 iw
+select0lw iw, src
+convhwb dst, iw
diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
index f6c959c00..83b867dca 100644
--- a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
+++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
@@ -1,6 +1,7 @@
.function volk_16sc_magnitude_16s_aligned16_orc_impl
.source 4 src
.dest 2 dst
+.floatparam 4 scalar
.temp 2 reals
.temp 2 imags
.temp 4 reall
@@ -12,13 +13,16 @@
.temp 4 rootl
splitlw reals, imags, src
-convwl reall, reals
-convwl imagl, imags
+convswl reall, reals
+convswl imagl, imags
convlf realf, reall
convlf imagf, imagl
-mulf realf, realf, (1.0 / 32768.0)
-mulf imagf, imagf, (1.0 / 32768.0)
+divf realf, realf, scalar
+divf imagf, imagf, scalar
+mulf realf, realf, realf
+mulf imagf, imagf, imagf
addf sumf, realf, imagf
sqrtf rootf, sumf
+mulf rootf, rootf, scalar
convfl rootl, rootf
-conflw dst, rootl
+convlw dst, rootl
diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
index 44654ad8e..6d2ed8197 100644
--- a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
+++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
@@ -2,7 +2,6 @@
.source 4 src
.dest 4 dst
.floatparam 4 scalar
-.temp 4 invscalar
.temp 4 reall
.temp 4 imagl
.temp 2 reals
@@ -11,14 +10,15 @@
.temp 4 imagf
.temp 4 sumf
-divf invscalar, 1.0, scalar
+
+
splitlw reals, imags, src
convswl reall, reals
convswl imagl, imags
convlf realf, reall
convlf imagf, imagl
-mulf realf, realf, invscalar
-mulf imagf, imagf, invscalar
+divf realf, realf, scalar
+divf imagf, imagf, scalar
mulf realf, realf, realf
mulf imagf, imagf, imagf
addf sumf, realf, imagf
diff --git a/volk/orc/volk_32f_max_aligned16_orc_impl.orc b/volk/orc/volk_32f_max_aligned16_orc_impl.orc
new file mode 100644
index 000000000..97f48ba4a
--- /dev/null
+++ b/volk/orc/volk_32f_max_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_max_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+maxf dst, src1, src2
diff --git a/volk/orc/volk_32f_min_aligned16_orc_impl.orc b/volk/orc/volk_32f_min_aligned16_orc_impl.orc
new file mode 100644
index 000000000..a597933de
--- /dev/null
+++ b/volk/orc/volk_32f_min_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_min_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+minf dst, src1, src2
diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
index db8405e59..f71dd9a37 100644
--- a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
+++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
@@ -2,24 +2,18 @@
.source 8 src
.dest 2 dst
.floatparam 4 scalar
-.temp 4 invscalar
-.temp 4 reall
-.temp 4 imagl
-.temp 4 realf
-.temp 4 imagf
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
.temp 4 sumf
.temp 4 rootf
.temp 4 rootl
-divf invscalar, 1.0, scalar
-splitql reall, imagl, src
-convlf realf, reall
-convlf imagf, imagl
-mulf realf, realf, invscalar
-mulf imagf, imagf, invscalar
-mulf realf, realf, realf
-mulf imagf, imagf, imagf
-addf sumf, realf, imagf
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
sqrtf rootf, sumf
+mulf rootf, rootf, scalar
convfl rootl, rootf
-convlw dst, rootl
+convssslw dst, rootl
diff --git a/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc
index cc5c85b45..47a10531d 100644
--- a/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc
+++ b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc
@@ -1,21 +1,13 @@
.function volk_32fc_magnitude_32f_aligned16_orc_impl
.source 8 src
.dest 4 dst
-.floatparam 4 scalar
-.temp 4 invscalar
-.temp 4 reall
-.temp 4 imagl
-.temp 4 realf
-.temp 4 imagf
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
.temp 4 sumf
-divf invscalar, 1.0, scalar
-splitql reall, imagl, src
-convlf realf, reall
-convlf imagf, imagl
-mulf realf, realf, invscalar
-mulf imagf, imagf, invscalar
-mulf realf, realf, realf
-mulf imagf, imagf, imagf
-addf sumf, realf, imagf
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
sqrtf dst, sumf
diff --git a/volk/volk.pc.in b/volk/volk.pc.in
index a24298856..b03dbdada 100644
--- a/volk/volk.pc.in
+++ b/volk/volk.pc.in
@@ -10,6 +10,6 @@ Name: volk
Description: VOLK.. Vector Optimized Library of Kernels
Requires:
Version: @VERSION@
-Libs: -lvolk -lvolk_runtime
+Libs: -lvolk -lvolk_runtime -lvolk_orc
Cflags: -I${includedir} ${LV_CXXFLAGS}