summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/Makefile.am11
-rw-r--r--volk/config/Makefile.am1
-rw-r--r--volk/config/lv_configure.m43
-rw-r--r--volk/config/orc.m461
-rw-r--r--volk/configure.ac4
-rw-r--r--volk/include/volk/Makefile.am2
-rw-r--r--volk/include/volk/archs.xml6
-rw-r--r--volk/include/volk/make_set_simd.py56
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h14
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h15
-rw-r--r--volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h13
-rw-r--r--volk/include/volk/volk_16sc_magnitude_16s_aligned16.h15
-rw-r--r--volk/include/volk/volk_16sc_magnitude_32f_aligned16.h14
-rw-r--r--volk/include/volk/volk_16u_byteswap_aligned16.h12
-rw-r--r--volk/include/volk/volk_32f_add_aligned16.h14
-rw-r--r--volk/include/volk/volk_32f_divide_aligned16.h13
-rw-r--r--volk/include/volk/volk_32f_max_aligned16.h14
-rw-r--r--volk/include/volk/volk_32f_min_aligned16.h14
-rw-r--r--volk/include/volk/volk_32f_multiply_aligned16.h14
-rw-r--r--volk/include/volk/volk_32f_sqrt_aligned16.h13
-rw-r--r--volk/include/volk/volk_32f_subtract_aligned16.h14
-rw-r--r--volk/include/volk/volk_32fc_magnitude_16s_aligned16.h14
-rw-r--r--volk/include/volk/volk_32fc_magnitude_32f_aligned16.h13
-rw-r--r--volk/include/volk/volk_32s_and_aligned16.h14
-rw-r--r--volk/include/volk/volk_32s_or_aligned16.h14
-rw-r--r--volk/include/volk/volk_8s_convert_16s_aligned16.h12
-rw-r--r--volk/include/volk/volk_8s_convert_32f_aligned16.h13
-rw-r--r--volk/lib/Makefile.am25
-rw-r--r--volk/lib/qa_16sc_deinterleave_16s_aligned16.cc12
-rw-r--r--volk/lib/qa_16sc_deinterleave_32f_aligned16.cc11
-rw-r--r--volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc9
-rw-r--r--volk/lib/qa_16sc_magnitude_16s_aligned16.cc10
-rw-r--r--volk/lib/qa_16sc_magnitude_32f_aligned16.cc20
-rw-r--r--volk/lib/qa_16u_byteswap_aligned16.cc10
-rw-r--r--volk/lib/qa_32f_add_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_divide_aligned16.cc19
-rw-r--r--volk/lib/qa_32f_max_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_min_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_multiply_aligned16.cc9
-rw-r--r--volk/lib/qa_32f_sqrt_aligned16.cc15
-rw-r--r--volk/lib/qa_32f_subtract_aligned16.cc9
-rw-r--r--volk/lib/qa_32fc_magnitude_16s_aligned16.cc13
-rw-r--r--volk/lib/qa_32fc_magnitude_32f_aligned16.cc9
-rw-r--r--volk/lib/qa_32s_and_aligned16.cc9
-rw-r--r--volk/lib/qa_32s_or_aligned16.cc9
-rw-r--r--volk/lib/qa_8s_convert_32f_aligned16.cc8
-rw-r--r--volk/orc/Makefile.am52
-rw-r--r--volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc12
-rw-r--r--volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc6
-rw-r--r--volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc23
-rw-r--r--volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc25
-rw-r--r--volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc3
-rw-r--r--volk/orc/volk_32f_add_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_divide_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_max_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_min_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_multiply_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc4
-rw-r--r--volk/orc/volk_32f_subtract_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc23
-rw-r--r--volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc13
-rw-r--r--volk/orc/volk_32s_and_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_32s_or_aligned16_orc_impl.orc5
-rw-r--r--volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc4
-rw-r--r--volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc9
-rw-r--r--volk/volk.pc.in2
67 files changed, 820 insertions, 28 deletions
diff --git a/volk/Makefile.am b/volk/Makefile.am
index 3521dd0e4..271d495cd 100644
--- a/volk/Makefile.am
+++ b/volk/Makefile.am
@@ -24,7 +24,12 @@ ACLOCAL_AMFLAGS = -I config
include $(top_srcdir)/Makefile.common
EXTRA_DIST = bootstrap configure config.h.in volk_config.h
-SUBDIRS = config include lib
+SUBDIRS = config
+if LV_HAVE_ORC
+SUBDIRS += orc
+endif
+SUBDIRS += include lib
+
#if USE_PYTHON
#SUBDIRS += python
#endif
@@ -53,4 +58,6 @@ distclean-local:
-rm -f include/Makefile.in
-rm -f lib/Makefile.in
-rm -f python/Makefile.in
- -rm -f configure \ No newline at end of file
+ -rm -f configure
+ -rm -f orc/Makefile.in
+ -rm -f orc/*.c
diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am
index 0e556c6e2..1d0041e35 100644
--- a/volk/config/Makefile.am
+++ b/volk/config/Makefile.am
@@ -45,6 +45,7 @@ m4macros = \
mkstemp.m4 \
onceonly.m4 \
pkg.m4 \
+ orc.m4 \
gcc_version_workaround.m4
diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4
index c7a5fe960..f98b2dc5b 100644
--- a/volk/config/lv_configure.m4
+++ b/volk/config/lv_configure.m4
@@ -100,6 +100,9 @@ dnl AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes])
GR_PWIN32
GR_LIBGNURADIO_CORE_EXTRA_LDFLAGS
+ dnl Check for liborc
+ ORC_CHECK
+
LDFLAGS="$LDFLAGS $LIBGNURADIO_CORE_EXTRA_LDFLAGS"
AC_CHECK_PROG([XMLTO],[xmlto],[yes],[])
diff --git a/volk/config/orc.m4 b/volk/config/orc.m4
new file mode 100644
index 000000000..a4653400c
--- /dev/null
+++ b/volk/config/orc.m4
@@ -0,0 +1,61 @@
+dnl pkg-config-based checks for Orc
+
+dnl specific:
+dnl ORC_CHECK([REQUIRED_VERSION])
+
+AC_DEFUN([ORC_CHECK],
+[
+ ORC_REQ=ifelse([$1], , "0.4.10", [$1])
+
+ enable_orc = auto
+ if test "x$enable_orc" != "xno" ; then
+ PKG_CHECK_MODULES(ORC, orc-0.4 >= $ORC_REQ, [
+ AC_DEFINE(HAVE_ORC, 1, [Use Orc])
+ if test "x$ORCC" = "x" ; then
+ ORCC=`$PKG_CONFIG --variable=orcc orc-0.4`
+ fi
+ AC_SUBST(ORCC)
+ ORCC_FLAGS="--compat $ORC_REQ"
+ ORC_LDFLAGS=`$PKG_CONFIG --libs orc-0.4`
+ ORC_CFLAGS=`$PKG_CONFIG --cflags orc-0.4`
+ AC_SUBST(ORCC_FLAGS)
+ AC_SUBST(ORC_LDFLAGS)
+ AC_SUBST(ORC_CFLAGS)
+ HAVE_ORC=yes
+ HAVE_ORCC=yes
+ if test "x$cross_compiling" = "xyes" ; then
+ HAVE_ORCC=no
+ fi
+ ], [
+ if test "x$enable_orc" = "xyes" ; then
+ AC_MSG_ERROR([--enable-orc specified, but Orc >= $ORC_REQ not found])
+ fi
+ AC_DEFINE(DISABLE_ORC, 1, [Disable Orc])
+ HAVE_ORC=no
+ HAVE_ORCC=no
+ ])
+ else
+ AC_DEFINE(DISABLE_ORC, 1, [Disable Orc])
+ HAVE_ORC=no
+ HAVE_ORCC=no
+ fi
+ AM_CONDITIONAL(HAVE_ORC, [test "x$HAVE_ORC" = "xyes"])
+ AM_CONDITIONAL(HAVE_ORCC, [test "x$HAVE_ORCC" = "xyes"])
+
+]))
+
+AC_DEFUN([ORC_OUTPUT],
+[
+ if test "$HAVE_ORC" = yes ; then
+ printf "configure: *** Orc acceleration enabled.\n"
+ else
+ if test "x$enable_orc" = "xno" ; then
+ printf "configure: *** Orc acceleration disabled by --disable-orc.\n"
+ else
+ printf "configure: *** Orc acceleration disabled. Requires Orc >= $ORC_REQ, which was\n"
+ printf " not found.\n"
+ fi
+ fi
+ printf "\n"
+])
+
diff --git a/volk/configure.ac b/volk/configure.ac
index 8f17e5065..5a1eac3f2 100644
--- a/volk/configure.ac
+++ b/volk/configure.ac
@@ -78,6 +78,10 @@ AC_CONFIG_FILES([\
lib/Makefile \
volk.pc \
])
+
+if test "$HAVE_ORC" = yes; then
+ AC_CONFIG_FILES([orc/Makefile])
+fi
AC_OUTPUT
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index 04a43bd34..99276ab87 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -179,4 +179,4 @@ distclean-local:
rm -f Makefile.in
rm -f volk_environment_init.h
rm -f volk_mktables
- rm -f $(BUILT_SOURCES) \ No newline at end of file
+ rm -f $(BUILT_SOURCES)
diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml
index b7c98500f..a19a5add9 100644
--- a/volk/include/volk/archs.xml
+++ b/volk/include/volk/archs.xml
@@ -5,6 +5,12 @@
<flag>none</flag>
</arch>
+<arch name="orc" type="all">
+ <flag>lorc-0.4</flag>
+ <overrule>LV_HAVE_ORC</overrule>
+ <overrule_val>no</overrule_val>
+</arch>
+
<arch name="altivec" type="powerpc">
<flag>maltivec</flag>
</arch>
diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py
index 842366b18..f2b7c0656 100644
--- a/volk/include/volk/make_set_simd.py
+++ b/volk/include/volk/make_set_simd.py
@@ -110,7 +110,7 @@ def make_set_simd(dom) :
arch = str(domarch.attributes["name"].value);
tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n";
tempstring = tempstring + " ADDONS=\"\"\n";
- tempstring = tempstring + " BUILT_ARCHS=\"generic\"\n";
+ tempstring = tempstring + " BUILT_ARCHS=\"\"\n";
tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n";
tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n";
tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n";
@@ -180,8 +180,22 @@ def make_set_simd(dom) :
tempstring = tempstring + " indCXX=no\n"
tempstring = tempstring + " indLV_ARCH=no\n"
elif atype == "all":
+ tempstring = tempstring + " for i in $cf_with_lv_arch\n"
+ tempstring = tempstring + " do\n"
+ tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n";
+ tempstring = tempstring + " indLV_ARCH=yes\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " done\n"
+ tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n"
tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n";
+ tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
+
tempstring = tempstring + " ;;\n"
tempstring = tempstring + " (powerpc)\n"
@@ -225,14 +239,50 @@ def make_set_simd(dom) :
tempstring = tempstring + " indCXX=no\n"
tempstring = tempstring + " indLV_ARCH=no\n"
elif atype == "all":
+ tempstring = tempstring + " for i in $cf_with_lv_arch\n"
+ tempstring = tempstring + " do\n"
+ tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n";
+ tempstring = tempstring + " indLV_ARCH=yes\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " done\n"
+ tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n"
+ tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
+ tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n";
+ tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
+ tempstring = tempstring + " ;;\n"
+ tempstring = tempstring + " (*)\n"
+ for domarch in dom:
+ arch = str(domarch.attributes["name"].value);
+ atype = str(domarch.attributes["type"].value);
+ flag = domarch.getElementsByTagName("flag");
+ flag = str(flag[0].firstChild.data);
+ if atype == "all":
+ tempstring = tempstring + " for i in $cf_with_lv_arch\n"
+ tempstring = tempstring + " do\n"
+ tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n";
+ tempstring = tempstring + " indLV_ARCH=yes\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " done\n"
+ tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n"
tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n";
tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n";
+ tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n";
+ tempstring = tempstring + " fi\n"
+ tempstring = tempstring + " indLV_ARCH=no\n"
tempstring = tempstring + " ;;\n"
tempstring = tempstring + " esac\n"
tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n"
+ tempstring = tempstring + " AM_CONDITIONAL(LV_HAVE_ORC, [test \"$LV_HAVE_ORC\" = \"yes\"])\n";
tempstring = tempstring + "])\n"
return tempstring;
-
-
+
diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
index 32e13df98..cf94a3f38 100644
--- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h
@@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Deinterleaves the complex 16 bit vector into I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16sc_deinterleave_16s_aligned16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16sc_deinterleave_16s_aligned16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+ volk_16sc_deinterleave_16s_aligned16_orc_impl(iBuffer, qBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
index 86f67437d..50b8b62d5 100644
--- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h
@@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer,
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+ /*!
+ \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param scalar The data value to be divided against each input data value of the input complex vector
+ \param num_points The number of complex data values to be deinterleaved
+ */
+extern void volk_16sc_deinterleave_32f_aligned16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16sc_deinterleave_32f_aligned16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+ volk_16sc_deinterleave_32f_aligned16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
index c0d1e941a..2dd85a422 100644
--- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
+++ b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
@@ -77,7 +77,18 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuf
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+extern void volk_16sc_deinterleave_real_8s_aligned16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
+static inline void volk_16sc_deinterleave_real_8s_aligned16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
+ volk_16sc_deinterleave_real_8s_aligned16_orc_impl(iBuffer, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
index 1482ab82e..41e8751d6 100644
--- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
+++ b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h
@@ -164,7 +164,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
const int16_t* complexVectorPtr = (const int16_t*)complexVector;
int16_t* magnitudeVectorPtr = magnitudeVector;
unsigned int number = 0;
- const float scalar = 32786.0;
+ const float scalar = 32768.0;
for(number = 0; number < num_points; number++){
float real = ((float)(*complexVectorPtr++)) / scalar;
float imag = ((float)(*complexVectorPtr++)) / scalar;
@@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC_DISABLED
+/*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
+static inline void volk_16sc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){
+ volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
index 9c2a48835..c2605d551 100644
--- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h
@@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC_DISABLED
+/*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param scalar The data value to be divided against each input data value of the input complex vector
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_16sc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_16sc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
+ volk_16sc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */
diff --git a/volk/include/volk/volk_16u_byteswap_aligned16.h b/volk/include/volk/volk_16u_byteswap_aligned16.h
index 698e958e4..9d19d1a45 100644
--- a/volk/include/volk/volk_16u_byteswap_aligned16.h
+++ b/volk/include/volk/volk_16u_byteswap_aligned16.h
@@ -61,5 +61,17 @@ static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, uns
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Byteswaps (in-place) an aligned vector of int16_t's.
+ \param intsToSwap The vector of data to byte swap
+ \param numDataPoints The number of data points
+*/
+extern void volk_16u_byteswap_aligned16_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
+static inline void volk_16u_byteswap_aligned16_orc(uint16_t* intsToSwap, unsigned int num_points){
+ volk_16u_byteswap_aligned16_orc_impl(intsToSwap, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_add_aligned16.h b/volk/include/volk/volk_32f_add_aligned16.h
index 721c60fd6..e7d8de265 100644
--- a/volk/include/volk/volk_32f_add_aligned16.h
+++ b/volk/include/volk/volk_32f_add_aligned16.h
@@ -63,7 +63,19 @@ static inline void volk_32f_add_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+extern void volk_32f_add_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_add_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_add_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32f_ADD_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_divide_aligned16.h b/volk/include/volk/volk_32f_divide_aligned16.h
index c00700cd8..c595b5e92 100644
--- a/volk/include/volk/volk_32f_divide_aligned16.h
+++ b/volk/include/volk/volk_32f_divide_aligned16.h
@@ -63,6 +63,19 @@ static inline void volk_32f_divide_aligned16_generic(float* cVector, const float
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Divides the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be divideed
+ \param bVector The divisor vector
+ \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
+*/
+extern void volk_32f_divide_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_divide_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_divide_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_max_aligned16.h
index 96aafb2bf..d4e30fba8 100644
--- a/volk/include/volk/volk_32f_max_aligned16.h
+++ b/volk/include/volk/volk_32f_max_aligned16.h
@@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_max_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_max_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_max_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_min_aligned16.h
index e247f4213..55daafb6a 100644
--- a/volk/include/volk/volk_32f_min_aligned16.h
+++ b/volk/include/volk/volk_32f_min_aligned16.h
@@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be checked
+ \param bVector The vector to be checked
+ \param num_points The number of values in aVector and bVector to be checked and stored into cVector
+*/
+extern void volk_32f_min_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_min_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_min_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_multiply_aligned16.h b/volk/include/volk/volk_32f_multiply_aligned16.h
index b557580ab..87ae7bcf8 100644
--- a/volk/include/volk/volk_32f_multiply_aligned16.h
+++ b/volk/include/volk/volk_32f_multiply_aligned16.h
@@ -63,7 +63,19 @@ static inline void volk_32f_multiply_aligned16_generic(float* cVector, const flo
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Multiplys the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+extern void volk_32f_multiply_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_multiply_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_multiply_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32f_sqrt_aligned16.h b/volk/include/volk/volk_32f_sqrt_aligned16.h
index 0b2eaf251..f6996ad5f 100644
--- a/volk/include/volk/volk_32f_sqrt_aligned16.h
+++ b/volk/include/volk/volk_32f_sqrt_aligned16.h
@@ -58,6 +58,19 @@ static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float*
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+extern void volk_32f_sqrt_aligned16_orc_impl(float *, const float*, unsigned int);
+/*!
+ \brief Sqrts the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be sqrted
+ \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
+*/
+static inline void volk_32f_sqrt_aligned16_orc(float* cVector, const float* aVector, unsigned int num_points){
+ volk_32f_sqrt_aligned16_orc_impl(cVector, aVector, num_points);
+}
+
+#endif /* LV_HAVE_ORC */
diff --git a/volk/include/volk/volk_32f_subtract_aligned16.h b/volk/include/volk/volk_32f_subtract_aligned16.h
index ac3f5e5d1..e15242901 100644
--- a/volk/include/volk/volk_32f_subtract_aligned16.h
+++ b/volk/include/volk/volk_32f_subtract_aligned16.h
@@ -63,5 +63,19 @@ static inline void volk_32f_subtract_aligned16_generic(float* cVector, const flo
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+/*!
+ \brief Subtracts bVector form aVector and store their results in the cVector
+ \param cVector The vector where the results will be stored
+ \param aVector The initial vector
+ \param bVector The vector to be subtracted
+ \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector
+*/
+extern void volk_32f_subtract_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_32f_subtract_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_32f_subtract_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
#endif /* INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h
index 4e64d8c22..4e590e120 100644
--- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h
+++ b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h
@@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param scalar The scale value multiplied to the magnitude of each complex vector
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+*/
+extern void volk_32fc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points);
+static inline void volk_32fc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
+ volk_32fc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h
index 7a8fd1ef9..3ea62da6a 100644
--- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h
+++ b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h
@@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+ /*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+extern void volk_32fc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
+static inline void volk_32fc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ volk_32fc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32s_and_aligned16.h b/volk/include/volk/volk_32s_and_aligned16.h
index e9f1e3a43..16c63fd48 100644
--- a/volk/include/volk/volk_32s_and_aligned16.h
+++ b/volk/include/volk/volk_32s_and_aligned16.h
@@ -63,7 +63,19 @@ static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Ands the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors
+ \param bVector One of the vectors
+ \param num_points The number of values in aVector and bVector to be anded together and stored into cVector
+*/
+extern void volk_32s_and_aligned16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32s_and_aligned16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+ volk_32s_and_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32s_AND_ALIGNED16_H */
diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32s_or_aligned16.h
index f4c427c4d..64748d535 100644
--- a/volk/include/volk/volk_32s_or_aligned16.h
+++ b/volk/include/volk/volk_32s_or_aligned16.h
@@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t
}
#endif /* LV_HAVE_GENERIC */
-
+#if LV_HAVE_ORC
+/*!
+ \brief Ors the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be ored
+ \param bVector One of the vectors to be ored
+ \param num_points The number of values in aVector and bVector to be ored together and stored into cVector
+*/
+extern void volk_32s_or_aligned16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points);
+static inline void volk_32s_or_aligned16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){
+ volk_32s_or_aligned16_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */
diff --git a/volk/include/volk/volk_8s_convert_16s_aligned16.h b/volk/include/volk/volk_8s_convert_16s_aligned16.h
index 0efe3c6a1..c52c64eae 100644
--- a/volk/include/volk/volk_8s_convert_16s_aligned16.h
+++ b/volk/include/volk/volk_8s_convert_16s_aligned16.h
@@ -65,6 +65,18 @@ static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector,
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+ /*!
+ \brief Converts the input 8 bit integer data into 16 bit integer data
+ \param inputVector The 8 bit input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+extern void volk_8s_convert_16s_aligned16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points);
+static inline void volk_8s_convert_16s_aligned16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){
+ volk_8s_convert_16s_aligned16_orc_impl(outputVector, inputVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
diff --git a/volk/include/volk/volk_8s_convert_32f_aligned16.h b/volk/include/volk/volk_8s_convert_32f_aligned16.h
index 54b66ef8f..700a0fa42 100644
--- a/volk/include/volk/volk_8s_convert_32f_aligned16.h
+++ b/volk/include/volk/volk_8s_convert_32f_aligned16.h
@@ -86,6 +86,19 @@ static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, co
}
#endif /* LV_HAVE_GENERIC */
+#if LV_HAVE_ORC
+ /*!
+ \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 8 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ */
+extern void volk_8s_convert_32f_aligned16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points);
+static inline void volk_8s_convert_32f_aligned16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
+ volk_8s_convert_32f_aligned16_orc_impl(outputVector, inputVector, scalar, num_points);
+}
+#endif /* LV_HAVE_ORC */
diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 814d438fd..253033461 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
# list of programs run by "make check" and "make distcheck"
#TESTS = test_all
-
+#orc stuff gets built in the ORC directory conditional to ORC being enabled.
+#it gets linked in during the build of libvolk as an added library.
+#there might be a better way to do this.
lib_LTLIBRARIES = \
libvolk.la \
@@ -131,13 +133,22 @@ libvolk_runtime_la_SOURCES = \
$(universal_runtime_CODE)
endif
-
-
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+volk_orc_LDFLAGS = \
+ $(ORC_LDFLAGS) \
+ -lorc-0.4
+
+volk_orc_LIBADD = \
+ ../orc/libvolk_orc.la
+
+if LV_HAVE_ORC
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_la_LIBADD = $(volk_orc_LIBADD)
+else
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-
libvolk_la_LIBADD =
-
+endif
# ----------------------------------------------------------------
@@ -233,7 +244,7 @@ libvolk_qa_la_SOURCES = \
qa_32f_stddev_aligned16.cc \
qa_32f_stddev_and_mean_aligned16.cc
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
libvolk_qa_la_LIBADD = \
libvolk.la \
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index c775e8596..aadc39067 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -27,6 +27,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
@@ -45,6 +47,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
}
end = clock();
@@ -71,6 +80,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]);
+
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index b25094e89..13151be13 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -27,6 +27,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
float output_generic1[vlen] __attribute__ ((aligned (16)));
float output_sse2[vlen] __attribute__ ((aligned (16)));
float output_sse21[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
+ float output_orc1[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -43,6 +45,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -58,6 +67,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4);
}
}
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index dd446567e..803caaa2d 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -25,6 +25,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
int8_t output_generic[vlen] __attribute__ ((aligned (16)));
int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+ int8_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t* loadInput = (int16_t*)input0;
for(int i = 0; i < vlen*2; ++i) {
@@ -41,6 +42,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+ CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index 9799ef43b..7fbdd8620 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t output_sse[vlen] __attribute__ ((aligned (16)));
int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -42,6 +43,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
}
end = clock();
@@ -65,6 +74,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 1ebe644c5..54cc2ba6e 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -16,6 +16,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_known[vlen] __attribute__ ((aligned (16)));
int16_t* inputLoad = (int16_t*)input0;
@@ -38,6 +39,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
/*
for(int i = 0; i < 100; ++i) {
@@ -49,6 +58,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
}
}
@@ -64,6 +74,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_sse[vlen] __attribute__ ((aligned (16)));
float output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -80,6 +91,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+/* start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+*/
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -105,6 +124,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index ea117a820..c2295968b 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -25,11 +25,13 @@ void qa_16u_byteswap_aligned16::t1() {
uint16_t output0[vlen] __attribute__ ((aligned (16)));
uint16_t output01[vlen] __attribute__ ((aligned (16)));
+ uint16_t output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
}
memcpy(output01, output0, vlen*sizeof(uint16_t));
+ memcpy(output02, output0, vlen*sizeof(uint16_t));
printf("16u_byteswap_aligned\n");
@@ -42,6 +44,13 @@ void qa_16u_byteswap_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_16u_byteswap_aligned16_manual(output02, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
}
end = clock();
@@ -55,6 +64,7 @@ void qa_16u_byteswap_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
index f80d562d4..a183d4d85 100644
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -79,6 +79,7 @@ void qa_32f_add_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -95,6 +96,13 @@ void qa_32f_add_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -108,6 +116,7 @@ void qa_32f_add_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index 3257a3751..f2a1b9e7f 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -36,6 +36,7 @@ void qa_32f_divide_aligned16::t1() {
float input1[vlen] __attribute__ ((aligned (16)));
float output0[vlen] __attribute__ ((aligned (16)));
+ float output1[vlen] __attribute__ ((aligned (16)));
float output_known[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
@@ -52,6 +53,14 @@ void qa_32f_divide_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
/*
for(int i = 0; i < 10; ++i) {
@@ -62,6 +71,7 @@ void qa_32f_divide_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+ CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]);
}
}
@@ -79,6 +89,7 @@ void qa_32f_divide_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -95,6 +106,13 @@ void qa_32f_divide_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -108,6 +126,7 @@ void qa_32f_divide_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index ceb913cb4..98f8ce9bc 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -26,6 +26,7 @@ void qa_32f_max_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -42,6 +43,13 @@ void qa_32f_max_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_32f_max_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 580a60e7d..798b47c53 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -26,6 +26,7 @@ void qa_32f_min_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -42,6 +43,13 @@ void qa_32f_min_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_32f_min_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
index 0c242b649..aa17cd62e 100644
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -79,6 +79,7 @@ void qa_32f_multiply_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -95,6 +96,13 @@ void qa_32f_multiply_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -108,6 +116,7 @@ void qa_32f_multiply_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
index 62d55767a..c216ce5d5 100644
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -53,6 +53,14 @@ void qa_32f_sqrt_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+
/*
for(int i = 0; i < 10; ++i) {
printf("inputs: %f\n", input0[i]);
@@ -94,6 +102,13 @@ void qa_32f_sqrt_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
}
end = clock();
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
index ffe4b504c..1e2210203 100644
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ b/volk/lib/qa_32f_subtract_aligned16.cc
@@ -26,6 +26,7 @@ void qa_32f_subtract_aligned16::t1() {
float output0[vlen] __attribute__ ((aligned (16)));
float output01[vlen] __attribute__ ((aligned (16)));
+ float output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -42,6 +43,13 @@ void qa_32f_subtract_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_32f_subtract_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 16984e30d..c718b6b71 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+ int16_t output_orc[vlen] __attribute__ ((aligned (16)));
int16_t output_sse[vlen] __attribute__ ((aligned (16)));
int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -42,6 +43,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
}
end = clock();
@@ -57,14 +65,15 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
printf("sse3_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
- //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
- //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+ // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+ // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
}
}
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
index b99f1ddcf..1d475fb86 100644
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -24,6 +24,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
float output_generic[vlen] __attribute__ ((aligned (16)));
+ float output_orc[vlen] __attribute__ ((aligned (16)));
float output_sse[vlen] __attribute__ ((aligned (16)));
float output_sse3[vlen] __attribute__ ((aligned (16)));
@@ -42,6 +43,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
}
end = clock();
@@ -65,6 +73,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
}
}
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
index 661801709..d20682147 100644
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ b/volk/lib/qa_32s_and_aligned16.cc
@@ -26,6 +26,7 @@ void qa_32s_and_aligned16::t1() {
int32_t output0[vlen] __attribute__ ((aligned (16)));
int32_t output01[vlen] __attribute__ ((aligned (16)));
+ int32_t output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -42,6 +43,13 @@ void qa_32s_and_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_32s_and_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
index 9da2ae344..bebf779b0 100644
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -26,6 +26,7 @@ void qa_32s_or_aligned16::t1() {
int32_t output0[vlen] __attribute__ ((aligned (16)));
int32_t output01[vlen] __attribute__ ((aligned (16)));
+ int32_t output02[vlen] __attribute__ ((aligned (16)));
for(int i = 0; i < vlen; ++i) {
input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -42,6 +43,13 @@ void qa_32s_or_aligned16::t1() {
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
+ volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
}
end = clock();
@@ -55,6 +63,7 @@ void qa_32s_or_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+ CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
}
}
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
index 672f5662f..f27e60552 100644
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_8s_convert_32f_aligned16.cc
@@ -41,6 +41,14 @@ void qa_8s_convert_32f_aligned16::t1() {
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
+
+ start = clock();
+ for(int count = 0; count < ITERS; ++count) {
+ volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc");
+ }
+ end = clock();
+ total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+ printf("orc_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am
new file mode 100644
index 000000000..a469638c1
--- /dev/null
+++ b/volk/orc/Makefile.am
@@ -0,0 +1,52 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS)
+
+include $(top_srcdir)/Makefile.common
+lib_LTLIBRARIES = libvolk_orc.la
+libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS)
+
+libvolk_orc_la_SOURCES = \
+volk_8s_convert_16s_aligned16_orc_impl.orc \
+volk_8s_convert_32f_aligned16_orc_impl.orc \
+volk_16u_byteswap_aligned16_orc_impl.orc \
+volk_32s_and_aligned16_orc_impl.orc \
+volk_32s_or_aligned16_orc_impl.orc \
+volk_32f_add_aligned16_orc_impl.orc \
+volk_32f_subtract_aligned16_orc_impl.orc \
+volk_32f_divide_aligned16_orc_impl.orc \
+volk_32f_multiply_aligned16_orc_impl.orc \
+volk_32f_sqrt_aligned16_orc_impl.orc \
+volk_32f_max_aligned16_orc_impl.orc \
+volk_32f_min_aligned16_orc_impl.orc \
+volk_32fc_magnitude_32f_aligned16_orc_impl.orc \
+volk_32fc_magnitude_16s_aligned16_orc_impl.orc \
+volk_16sc_magnitude_16s_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \
+volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
+
+
+
+my_ORCC_FLAGS = --implementation $(ORCC_FLAGS)
+
+.orc.c:
+ $(ORCC) $(my_ORCC_FLAGS) -o $@ $<
diff --git a/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..d226064a7
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_16sc_deinterleave_16s_aligned16_orc_impl
+.dest 2 idst
+.dest 2 qdst
+.source 4 src
+splitlw qdst, idst, src
diff --git a/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..dddf682ca
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc
@@ -0,0 +1,12 @@
+.function volk_16sc_deinterleave_32f_aligned16_orc_impl
+.dest 4 idst
+.dest 4 qdst
+.source 4 src
+.floatparam 4 scalar
+.temp 8 iql
+.temp 8 iqf
+
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+splitql qdst, idst, iqf
diff --git a/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..609750096
--- /dev/null
+++ b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc
@@ -0,0 +1,6 @@
+.function volk_16sc_deinterleave_real_8s_aligned16_orc_impl
+.dest 1 dst
+.source 4 src
+.temp 2 iw
+select0lw iw, src
+convhwb dst, iw
diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..088f56312
--- /dev/null
+++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc
@@ -0,0 +1,23 @@
+.function volk_16sc_magnitude_16s_aligned16_orc_impl
+.source 4 src
+.dest 2 dst
+.floatparam 4 scalar
+.temp 8 iql
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+.temp 4 rootf
+.temp 4 rootl
+
+x2 convswl iql, src
+x2 convlf iqf, iql
+x2 divf iqf, iqf, scalar
+x2 mulf prodiqf, iqf, iqf
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf rootf, sumf
+mulf rootf, rootf, scalar
+convfl rootl, rootf
+convlw dst, rootl
diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..6d2ed8197
--- /dev/null
+++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
@@ -0,0 +1,25 @@
+.function volk_16sc_magnitude_32f_aligned16_orc_impl
+.source 4 src
+.dest 4 dst
+.floatparam 4 scalar
+.temp 4 reall
+.temp 4 imagl
+.temp 2 reals
+.temp 2 imags
+.temp 4 realf
+.temp 4 imagf
+.temp 4 sumf
+
+
+
+splitlw reals, imags, src
+convswl reall, reals
+convswl imagl, imags
+convlf realf, reall
+convlf imagf, imagl
+divf realf, realf, scalar
+divf imagf, imagf, scalar
+mulf realf, realf, realf
+mulf imagf, imagf, imagf
+addf sumf, realf, imagf
+sqrtf dst, sumf
diff --git a/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc b/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc
new file mode 100644
index 000000000..3ffd12ec0
--- /dev/null
+++ b/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc
@@ -0,0 +1,3 @@
+.function volk_16u_byteswap_aligned16_orc_impl
+.dest 2 dst
+swapw dst, dst
diff --git a/volk/orc/volk_32f_add_aligned16_orc_impl.orc b/volk/orc/volk_32f_add_aligned16_orc_impl.orc
new file mode 100644
index 000000000..20e000f68
--- /dev/null
+++ b/volk/orc/volk_32f_add_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_add_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+addf dst, src1, src2
diff --git a/volk/orc/volk_32f_divide_aligned16_orc_impl.orc b/volk/orc/volk_32f_divide_aligned16_orc_impl.orc
new file mode 100644
index 000000000..870843f2a
--- /dev/null
+++ b/volk/orc/volk_32f_divide_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_divide_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+divf dst, src1, src2
diff --git a/volk/orc/volk_32f_max_aligned16_orc_impl.orc b/volk/orc/volk_32f_max_aligned16_orc_impl.orc
new file mode 100644
index 000000000..97f48ba4a
--- /dev/null
+++ b/volk/orc/volk_32f_max_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_max_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+maxf dst, src1, src2
diff --git a/volk/orc/volk_32f_min_aligned16_orc_impl.orc b/volk/orc/volk_32f_min_aligned16_orc_impl.orc
new file mode 100644
index 000000000..a597933de
--- /dev/null
+++ b/volk/orc/volk_32f_min_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_min_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+minf dst, src1, src2
diff --git a/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc b/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc
new file mode 100644
index 000000000..23619af4e
--- /dev/null
+++ b/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_multiply_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+mulf dst, src1, src2
diff --git a/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc b/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc
new file mode 100644
index 000000000..0983271db
--- /dev/null
+++ b/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc
@@ -0,0 +1,4 @@
+.function volk_32f_sqrt_aligned16_orc_impl
+.source 4 src
+.dest 4 dst
+sqrtf dst, src
diff --git a/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc b/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc
new file mode 100644
index 000000000..17dbcad46
--- /dev/null
+++ b/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32f_subtract_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+subf dst, src1, src2
diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..15f8fdff0
--- /dev/null
+++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc
@@ -0,0 +1,23 @@
+.function volk_32fc_magnitude_16s_aligned16_orc_impl
+.source 8 src
+.dest 2 dst
+.floatparam 4 scalar
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+.temp 4 rootf
+.temp 4 rootl
+.temp 4 maskl
+
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf rootf, sumf
+mulf rootf, rootf, scalar
+cmpltf maskl, scalar, rootf
+andl maskl, maskl, 0x80000000
+orl rootf, rootf, maskl
+convfl rootl, rootf
+convssslw dst, rootl
diff --git a/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..47a10531d
--- /dev/null
+++ b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc
@@ -0,0 +1,13 @@
+.function volk_32fc_magnitude_32f_aligned16_orc_impl
+.source 8 src
+.dest 4 dst
+.temp 8 iqf
+.temp 8 prodiqf
+.temp 4 qf
+.temp 4 if
+.temp 4 sumf
+
+x2 mulf prodiqf, src, src
+splitql qf, if, prodiqf
+addf sumf, if, qf
+sqrtf dst, sumf
diff --git a/volk/orc/volk_32s_and_aligned16_orc_impl.orc b/volk/orc/volk_32s_and_aligned16_orc_impl.orc
new file mode 100644
index 000000000..9d3c7b733
--- /dev/null
+++ b/volk/orc/volk_32s_and_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32s_and_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+andl dst, src1, src2
diff --git a/volk/orc/volk_32s_or_aligned16_orc_impl.orc b/volk/orc/volk_32s_or_aligned16_orc_impl.orc
new file mode 100644
index 000000000..6d2a3859a
--- /dev/null
+++ b/volk/orc/volk_32s_or_aligned16_orc_impl.orc
@@ -0,0 +1,5 @@
+.function volk_32s_or_aligned16_orc_impl
+.dest 4 dst
+.source 4 src1
+.source 4 src2
+orl dst, src1, src2
diff --git a/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc b/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc
new file mode 100644
index 000000000..8322b529a
--- /dev/null
+++ b/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc
@@ -0,0 +1,4 @@
+.function volk_8s_convert_16s_aligned16_orc_impl
+.source 1 src
+.dest 2 dst
+mulsbw dst, src, 255
diff --git a/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc b/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc
new file mode 100644
index 000000000..91a0084d7
--- /dev/null
+++ b/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc
@@ -0,0 +1,9 @@
+.function volk_8s_convert_32f_aligned16_orc_impl
+.source 2 src
+.dest 4 dst
+.floatparam 4 scalar
+.temp 4 flsrc
+.temp 4 lsrc
+convswl lsrc, src
+convlf flsrc, lsrc
+mulf dst, flsrc, scalar
diff --git a/volk/volk.pc.in b/volk/volk.pc.in
index a24298856..b03dbdada 100644
--- a/volk/volk.pc.in
+++ b/volk/volk.pc.in
@@ -10,6 +10,6 @@ Name: volk
Description: VOLK.. Vector Optimized Library of Kernels
Requires:
Version: @VERSION@
-Libs: -lvolk -lvolk_runtime
+Libs: -lvolk -lvolk_runtime -lvolk_orc
Cflags: -I${includedir} ${LV_CXXFLAGS}