diff options
67 files changed, 820 insertions, 28 deletions
diff --git a/volk/Makefile.am b/volk/Makefile.am index 3521dd0e4..271d495cd 100644 --- a/volk/Makefile.am +++ b/volk/Makefile.am @@ -24,7 +24,12 @@ ACLOCAL_AMFLAGS = -I config include $(top_srcdir)/Makefile.common EXTRA_DIST = bootstrap configure config.h.in volk_config.h -SUBDIRS = config include lib +SUBDIRS = config +if LV_HAVE_ORC +SUBDIRS += orc +endif +SUBDIRS += include lib + #if USE_PYTHON #SUBDIRS += python #endif @@ -53,4 +58,6 @@ distclean-local: -rm -f include/Makefile.in -rm -f lib/Makefile.in -rm -f python/Makefile.in - -rm -f configure
\ No newline at end of file + -rm -f configure + -rm -f orc/Makefile.in + -rm -f orc/*.c diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am index 0e556c6e2..1d0041e35 100644 --- a/volk/config/Makefile.am +++ b/volk/config/Makefile.am @@ -45,6 +45,7 @@ m4macros = \ mkstemp.m4 \ onceonly.m4 \ pkg.m4 \ + orc.m4 \ gcc_version_workaround.m4 diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4 index c7a5fe960..f98b2dc5b 100644 --- a/volk/config/lv_configure.m4 +++ b/volk/config/lv_configure.m4 @@ -100,6 +100,9 @@ dnl AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes]) GR_PWIN32 GR_LIBGNURADIO_CORE_EXTRA_LDFLAGS + dnl Check for liborc + ORC_CHECK + LDFLAGS="$LDFLAGS $LIBGNURADIO_CORE_EXTRA_LDFLAGS" AC_CHECK_PROG([XMLTO],[xmlto],[yes],[]) diff --git a/volk/config/orc.m4 b/volk/config/orc.m4 new file mode 100644 index 000000000..a4653400c --- /dev/null +++ b/volk/config/orc.m4 @@ -0,0 +1,61 @@ +dnl pkg-config-based checks for Orc + +dnl specific: +dnl ORC_CHECK([REQUIRED_VERSION]) + +AC_DEFUN([ORC_CHECK], +[ + ORC_REQ=ifelse([$1], , "0.4.10", [$1]) + + enable_orc = auto + if test "x$enable_orc" != "xno" ; then + PKG_CHECK_MODULES(ORC, orc-0.4 >= $ORC_REQ, [ + AC_DEFINE(HAVE_ORC, 1, [Use Orc]) + if test "x$ORCC" = "x" ; then + ORCC=`$PKG_CONFIG --variable=orcc orc-0.4` + fi + AC_SUBST(ORCC) + ORCC_FLAGS="--compat $ORC_REQ" + ORC_LDFLAGS=`$PKG_CONFIG --libs orc-0.4` + ORC_CFLAGS=`$PKG_CONFIG --cflags orc-0.4` + AC_SUBST(ORCC_FLAGS) + AC_SUBST(ORC_LDFLAGS) + AC_SUBST(ORC_CFLAGS) + HAVE_ORC=yes + HAVE_ORCC=yes + if test "x$cross_compiling" = "xyes" ; then + HAVE_ORCC=no + fi + ], [ + if test "x$enable_orc" = "xyes" ; then + AC_MSG_ERROR([--enable-orc specified, but Orc >= $ORC_REQ not found]) + fi + AC_DEFINE(DISABLE_ORC, 1, [Disable Orc]) + HAVE_ORC=no + HAVE_ORCC=no + ]) + else + AC_DEFINE(DISABLE_ORC, 1, [Disable Orc]) + HAVE_ORC=no + HAVE_ORCC=no + fi + AM_CONDITIONAL(HAVE_ORC, [test "x$HAVE_ORC" = "xyes"]) + AM_CONDITIONAL(HAVE_ORCC, [test "x$HAVE_ORCC" = "xyes"]) + +])) + +AC_DEFUN([ORC_OUTPUT], +[ + if test "$HAVE_ORC" = yes ; then + printf "configure: *** Orc acceleration enabled.\n" + else + if test "x$enable_orc" = "xno" ; then + printf "configure: *** Orc acceleration disabled by --disable-orc.\n" + else + printf "configure: *** Orc acceleration disabled. Requires Orc >= $ORC_REQ, which was\n" + printf " not found.\n" + fi + fi + printf "\n" +]) + diff --git a/volk/configure.ac b/volk/configure.ac index 8f17e5065..5a1eac3f2 100644 --- a/volk/configure.ac +++ b/volk/configure.ac @@ -78,6 +78,10 @@ AC_CONFIG_FILES([\ lib/Makefile \ volk.pc \ ]) + +if test "$HAVE_ORC" = yes; then + AC_CONFIG_FILES([orc/Makefile]) +fi AC_OUTPUT diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 04a43bd34..99276ab87 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -179,4 +179,4 @@ distclean-local: rm -f Makefile.in rm -f volk_environment_init.h rm -f volk_mktables - rm -f $(BUILT_SOURCES)
\ No newline at end of file + rm -f $(BUILT_SOURCES) diff --git a/volk/include/volk/archs.xml b/volk/include/volk/archs.xml index b7c98500f..a19a5add9 100644 --- a/volk/include/volk/archs.xml +++ b/volk/include/volk/archs.xml @@ -5,6 +5,12 @@ <flag>none</flag> </arch> +<arch name="orc" type="all"> + <flag>lorc-0.4</flag> + <overrule>LV_HAVE_ORC</overrule> + <overrule_val>no</overrule_val> +</arch> + <arch name="altivec" type="powerpc"> <flag>maltivec</flag> </arch> diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py index 842366b18..f2b7c0656 100644 --- a/volk/include/volk/make_set_simd.py +++ b/volk/include/volk/make_set_simd.py @@ -110,7 +110,7 @@ def make_set_simd(dom) : arch = str(domarch.attributes["name"].value); tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"generic\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; @@ -180,8 +180,22 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" tempstring = tempstring + " (powerpc)\n" @@ -225,14 +239,50 @@ def make_set_simd(dom) : tempstring = tempstring + " indCXX=no\n" tempstring = tempstring + " indLV_ARCH=no\n" elif atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; + tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " ;;\n" + tempstring = tempstring + " (*)\n" + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + if atype == "all": + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" tempstring = tempstring + " ;;\n" tempstring = tempstring + " esac\n" tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" + tempstring = tempstring + " AM_CONDITIONAL(LV_HAVE_ORC, [test \"$LV_HAVE_ORC\" = \"yes\"])\n"; tempstring = tempstring + "])\n" return tempstring; - - + diff --git a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h index 32e13df98..cf94a3f38 100644 --- a/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h +++ b/volk/include/volk/volk_16sc_deinterleave_16s_aligned16.h @@ -140,7 +140,19 @@ static inline void volk_16sc_deinterleave_16s_aligned16_generic(int16_t* iBuffer } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16sc_deinterleave_16s_aligned16_orc_impl(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16sc_deinterleave_16s_aligned16_orc(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16sc_deinterleave_16s_aligned16_orc_impl(iBuffer, qBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_16S_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h index 86f67437d..50b8b62d5 100644 --- a/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h +++ b/volk/include/volk/volk_16sc_deinterleave_32f_aligned16.h @@ -89,7 +89,20 @@ static inline void volk_16sc_deinterleave_32f_aligned16_generic(float* iBuffer, } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC + /*! + \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param qBuffer The Q buffer output data + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex data values to be deinterleaved + */ +extern void volk_16sc_deinterleave_32f_aligned16_orc_impl(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16sc_deinterleave_32f_aligned16_orc(float* iBuffer, float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16sc_deinterleave_32f_aligned16_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_32F_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h index c0d1e941a..2dd85a422 100644 --- a/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h +++ b/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h @@ -77,7 +77,18 @@ static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuf } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data + \param complexVector The complex input vector + \param iBuffer The I buffer output data + \param num_points The number of complex data values to be deinterleaved +*/ +extern void volk_16sc_deinterleave_real_8s_aligned16_orc_impl(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16sc_deinterleave_real_8s_aligned16_orc(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16sc_deinterleave_real_8s_aligned16_orc_impl(iBuffer, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h index 1482ab82e..41e8751d6 100644 --- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h @@ -164,7 +164,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; - const float scalar = 32786.0; + const float scalar = 32768.0; for(number = 0; number < num_points; number++){ float real = ((float)(*complexVectorPtr++)) / scalar; float imag = ((float)(*complexVectorPtr++)) / scalar; @@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points); +static inline void volk_16sc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, 32768.0, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h index 9c2a48835..c2605d551 100644 --- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h @@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC_DISABLED +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16sc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16sc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16sc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16u_byteswap_aligned16.h b/volk/include/volk/volk_16u_byteswap_aligned16.h index 698e958e4..9d19d1a45 100644 --- a/volk/include/volk/volk_16u_byteswap_aligned16.h +++ b/volk/include/volk/volk_16u_byteswap_aligned16.h @@ -61,5 +61,17 @@ static inline void volk_16u_byteswap_aligned16_generic(uint16_t* intsToSwap, uns } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Byteswaps (in-place) an aligned vector of int16_t's. + \param intsToSwap The vector of data to byte swap + \param numDataPoints The number of data points +*/ +extern void volk_16u_byteswap_aligned16_orc_impl(uint16_t* intsToSwap, unsigned int num_points); +static inline void volk_16u_byteswap_aligned16_orc(uint16_t* intsToSwap, unsigned int num_points){ + volk_16u_byteswap_aligned16_orc_impl(intsToSwap, num_points); +} +#endif /* LV_HAVE_ORC */ + #endif /* INCLUDED_VOLK_16u_BYTESWAP_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32f_add_aligned16.h b/volk/include/volk/volk_32f_add_aligned16.h index 721c60fd6..e7d8de265 100644 --- a/volk/include/volk/volk_32f_add_aligned16.h +++ b/volk/include/volk/volk_32f_add_aligned16.h @@ -63,7 +63,19 @@ static inline void volk_32f_add_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +extern void volk_32f_add_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_add_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_add_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32f_ADD_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32f_divide_aligned16.h b/volk/include/volk/volk_32f_divide_aligned16.h index c00700cd8..c595b5e92 100644 --- a/volk/include/volk/volk_32f_divide_aligned16.h +++ b/volk/include/volk/volk_32f_divide_aligned16.h @@ -63,6 +63,19 @@ static inline void volk_32f_divide_aligned16_generic(float* cVector, const float } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Divides the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be divideed + \param bVector The divisor vector + \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector +*/ +extern void volk_32f_divide_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_divide_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_divide_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_32f_max_aligned16.h b/volk/include/volk/volk_32f_max_aligned16.h index 96aafb2bf..d4e30fba8 100644 --- a/volk/include/volk/volk_32f_max_aligned16.h +++ b/volk/include/volk/volk_32f_max_aligned16.h @@ -67,5 +67,19 @@ static inline void volk_32f_max_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_max_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_max_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_max_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + #endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32f_min_aligned16.h b/volk/include/volk/volk_32f_min_aligned16.h index e247f4213..55daafb6a 100644 --- a/volk/include/volk/volk_32f_min_aligned16.h +++ b/volk/include/volk/volk_32f_min_aligned16.h @@ -67,5 +67,19 @@ static inline void volk_32f_min_aligned16_generic(float* cVector, const float* a } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The vector to be checked + \param bVector The vector to be checked + \param num_points The number of values in aVector and bVector to be checked and stored into cVector +*/ +extern void volk_32f_min_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_min_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_min_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + #endif /* INCLUDED_VOLK_32f_MIN_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32f_multiply_aligned16.h b/volk/include/volk/volk_32f_multiply_aligned16.h index b557580ab..87ae7bcf8 100644 --- a/volk/include/volk/volk_32f_multiply_aligned16.h +++ b/volk/include/volk/volk_32f_multiply_aligned16.h @@ -63,7 +63,19 @@ static inline void volk_32f_multiply_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Multiplys the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +extern void volk_32f_multiply_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_multiply_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_multiply_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32f_MULTIPLY_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32f_sqrt_aligned16.h b/volk/include/volk/volk_32f_sqrt_aligned16.h index 0b2eaf251..f6996ad5f 100644 --- a/volk/include/volk/volk_32f_sqrt_aligned16.h +++ b/volk/include/volk/volk_32f_sqrt_aligned16.h @@ -58,6 +58,19 @@ static inline void volk_32f_sqrt_aligned16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +extern void volk_32f_sqrt_aligned16_orc_impl(float *, const float*, unsigned int); +/*! + \brief Sqrts the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be sqrted + \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector +*/ +static inline void volk_32f_sqrt_aligned16_orc(float* cVector, const float* aVector, unsigned int num_points){ + volk_32f_sqrt_aligned16_orc_impl(cVector, aVector, num_points); +} + +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_32f_subtract_aligned16.h b/volk/include/volk/volk_32f_subtract_aligned16.h index ac3f5e5d1..e15242901 100644 --- a/volk/include/volk/volk_32f_subtract_aligned16.h +++ b/volk/include/volk/volk_32f_subtract_aligned16.h @@ -63,5 +63,19 @@ static inline void volk_32f_subtract_aligned16_generic(float* cVector, const flo } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC +/*! + \brief Subtracts bVector form aVector and store their results in the cVector + \param cVector The vector where the results will be stored + \param aVector The initial vector + \param bVector The vector to be subtracted + \param num_points The number of values in aVector and bVector to be subtracted together and stored into cVector +*/ +extern void volk_32f_subtract_aligned16_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_32f_subtract_aligned16_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_32f_subtract_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + #endif /* INCLUDED_VOLK_32f_SUBTRACT_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h index 4e64d8c22..4e590e120 100644 --- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h @@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param scalar The scale value multiplied to the magnitude of each complex vector + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_32fc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h index 7a8fd1ef9..3ea62da6a 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h @@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +extern void volk_32fc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32s_and_aligned16.h b/volk/include/volk/volk_32s_and_aligned16.h index e9f1e3a43..16c63fd48 100644 --- a/volk/include/volk/volk_32s_and_aligned16.h +++ b/volk/include/volk/volk_32s_and_aligned16.h @@ -63,7 +63,19 @@ static inline void volk_32s_and_aligned16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Ands the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors + \param bVector One of the vectors + \param num_points The number of values in aVector and bVector to be anded together and stored into cVector +*/ +extern void volk_32s_and_aligned16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32s_and_aligned16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32s_and_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32s_AND_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32s_or_aligned16.h index f4c427c4d..64748d535 100644 --- a/volk/include/volk/volk_32s_or_aligned16.h +++ b/volk/include/volk/volk_32s_or_aligned16.h @@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Ors the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be ored + \param bVector One of the vectors to be ored + \param num_points The number of values in aVector and bVector to be ored together and stored into cVector +*/ +extern void volk_32s_or_aligned16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32s_or_aligned16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32s_or_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */ diff --git a/volk/include/volk/volk_8s_convert_16s_aligned16.h b/volk/include/volk/volk_8s_convert_16s_aligned16.h index 0efe3c6a1..c52c64eae 100644 --- a/volk/include/volk/volk_8s_convert_16s_aligned16.h +++ b/volk/include/volk/volk_8s_convert_16s_aligned16.h @@ -65,6 +65,18 @@ static inline void volk_8s_convert_16s_aligned16_generic(int16_t* outputVector, } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into 16 bit integer data + \param inputVector The 8 bit input data buffer + \param outputVector The 16 bit output data buffer + \param num_points The number of data values to be converted + */ +extern void volk_8s_convert_16s_aligned16_orc_impl(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points); +static inline void volk_8s_convert_16s_aligned16_orc(int16_t* outputVector, const int8_t* inputVector, unsigned int num_points){ + volk_8s_convert_16s_aligned16_orc_impl(outputVector, inputVector, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/include/volk/volk_8s_convert_32f_aligned16.h b/volk/include/volk/volk_8s_convert_32f_aligned16.h index 54b66ef8f..700a0fa42 100644 --- a/volk/include/volk/volk_8s_convert_32f_aligned16.h +++ b/volk/include/volk/volk_8s_convert_32f_aligned16.h @@ -86,6 +86,19 @@ static inline void volk_8s_convert_32f_aligned16_generic(float* outputVector, co } #endif /* LV_HAVE_GENERIC */ +#if LV_HAVE_ORC + /*! + \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 8 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +extern void volk_8s_convert_32f_aligned16_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); +static inline void volk_8s_convert_32f_aligned16_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ + volk_8s_convert_32f_aligned16_orc_impl(outputVector, inputVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 814d438fd..253033461 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ # list of programs run by "make check" and "make distcheck" #TESTS = test_all - +#orc stuff gets built in the ORC directory conditional to ORC being enabled. +#it gets linked in during the build of libvolk as an added library. +#there might be a better way to do this. lib_LTLIBRARIES = \ libvolk.la \ @@ -131,13 +133,22 @@ libvolk_runtime_la_SOURCES = \ $(universal_runtime_CODE) endif - - -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +volk_orc_LDFLAGS = \ + $(ORC_LDFLAGS) \ + -lorc-0.4 + +volk_orc_LIBADD = \ + ../orc/libvolk_orc.la + +if LV_HAVE_ORC +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) +else +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 - libvolk_la_LIBADD = - +endif # ---------------------------------------------------------------- @@ -233,7 +244,7 @@ libvolk_qa_la_SOURCES = \ qa_32f_stddev_aligned16.cc \ qa_32f_stddev_and_mean_aligned16.cc -libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 +libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 libvolk_qa_la_LIBADD = \ libvolk.la \ diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc index c775e8596..aadc39067 100644 --- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc @@ -27,6 +27,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { int16_t output_generic1[vlen] __attribute__ ((aligned (16))); int16_t output_sse2[vlen] __attribute__ ((aligned (16))); int16_t output_sse21[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); + int16_t output_orc1[vlen] __attribute__ ((aligned (16))); int16_t output_ssse3[vlen] __attribute__ ((aligned (16))); int16_t output_ssse31[vlen] __attribute__ ((aligned (16))); @@ -45,6 +47,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2"); } end = clock(); @@ -71,6 +80,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() { CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_ssse31[i]); + + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); + CPPUNIT_ASSERT_EQUAL(output_generic1[i], output_orc1[i]); } } diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc index b25094e89..13151be13 100644 --- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc @@ -27,6 +27,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { float output_generic1[vlen] __attribute__ ((aligned (16))); float output_sse2[vlen] __attribute__ ((aligned (16))); float output_sse21[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); + float output_orc1[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -43,6 +45,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse"); } end = clock(); @@ -58,6 +67,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_orc1[i], fabs(output_generic1[i])*1e-4); } } diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc index dd446567e..803caaa2d 100644 --- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc +++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc @@ -25,6 +25,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { int8_t output_generic[vlen] __attribute__ ((aligned (16))); int8_t output_ssse3[vlen] __attribute__ ((aligned (16))); + int8_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t* loadInput = (int16_t*)input0; for(int i = 0; i < vlen*2; ++i) { @@ -41,6 +42,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3"); } end = clock(); @@ -55,6 +63,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]); + CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]); } } diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index 9799ef43b..7fbdd8620 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -24,6 +24,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -42,6 +43,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); } end = clock(); @@ -65,6 +74,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 1ebe644c5..54cc2ba6e 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -16,6 +16,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); int16_t* inputLoad = (int16_t*)input0; @@ -38,6 +39,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 100; ++i) { @@ -49,6 +58,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); } } @@ -64,6 +74,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -80,6 +91,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); +/* start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); +*/ start = clock(); for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); @@ -105,6 +124,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); +// CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc index ea117a820..c2295968b 100644 --- a/volk/lib/qa_16u_byteswap_aligned16.cc +++ b/volk/lib/qa_16u_byteswap_aligned16.cc @@ -25,11 +25,13 @@ void qa_16u_byteswap_aligned16::t1() { uint16_t output0[vlen] __attribute__ ((aligned (16))); uint16_t output01[vlen] __attribute__ ((aligned (16))); + uint16_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); } memcpy(output01, output0, vlen*sizeof(uint16_t)); + memcpy(output02, output0, vlen*sizeof(uint16_t)); printf("16u_byteswap_aligned\n"); @@ -42,6 +44,13 @@ void qa_16u_byteswap_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16u_byteswap_aligned16_manual(output02, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2"); } end = clock(); @@ -55,6 +64,7 @@ void qa_16u_byteswap_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc index f80d562d4..a183d4d85 100644 --- a/volk/lib/qa_32f_add_aligned16.cc +++ b/volk/lib/qa_32f_add_aligned16.cc @@ -79,6 +79,7 @@ void qa_32f_add_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -95,6 +96,13 @@ void qa_32f_add_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -108,6 +116,7 @@ void qa_32f_add_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index 3257a3751..f2a1b9e7f 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -36,6 +36,7 @@ void qa_32f_divide_aligned16::t1() { float input1[vlen] __attribute__ ((aligned (16))); float output0[vlen] __attribute__ ((aligned (16))); + float output1[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { @@ -52,6 +53,14 @@ void qa_32f_divide_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 10; ++i) { @@ -62,6 +71,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]); + CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]); } } @@ -79,6 +89,7 @@ void qa_32f_divide_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -95,6 +106,13 @@ void qa_32f_divide_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -108,6 +126,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc index ceb913cb4..98f8ce9bc 100644 --- a/volk/lib/qa_32f_max_aligned16.cc +++ b/volk/lib/qa_32f_max_aligned16.cc @@ -26,6 +26,7 @@ void qa_32f_max_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -42,6 +43,13 @@ void qa_32f_max_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -55,6 +63,7 @@ void qa_32f_max_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc index 580a60e7d..798b47c53 100644 --- a/volk/lib/qa_32f_min_aligned16.cc +++ b/volk/lib/qa_32f_min_aligned16.cc @@ -26,6 +26,7 @@ void qa_32f_min_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -42,6 +43,13 @@ void qa_32f_min_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -55,6 +63,7 @@ void qa_32f_min_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc index 0c242b649..aa17cd62e 100644 --- a/volk/lib/qa_32f_multiply_aligned16.cc +++ b/volk/lib/qa_32f_multiply_aligned16.cc @@ -79,6 +79,7 @@ void qa_32f_multiply_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -95,6 +96,13 @@ void qa_32f_multiply_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -108,6 +116,7 @@ void qa_32f_multiply_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc index 62d55767a..c216ce5d5 100644 --- a/volk/lib/qa_32f_sqrt_aligned16.cc +++ b/volk/lib/qa_32f_sqrt_aligned16.cc @@ -53,6 +53,14 @@ void qa_32f_sqrt_aligned16::t1() { total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + /* for(int i = 0; i < 10; ++i) { printf("inputs: %f\n", input0[i]); @@ -94,6 +102,13 @@ void qa_32f_sqrt_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse"); } end = clock(); diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc index ffe4b504c..1e2210203 100644 --- a/volk/lib/qa_32f_subtract_aligned16.cc +++ b/volk/lib/qa_32f_subtract_aligned16.cc @@ -26,6 +26,7 @@ void qa_32f_subtract_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -42,6 +43,13 @@ void qa_32f_subtract_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -55,6 +63,7 @@ void qa_32f_subtract_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index 16984e30d..c718b6b71 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -24,6 +24,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { std::complex<float> input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -42,6 +43,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); } end = clock(); @@ -57,14 +65,15 @@ void qa_32fc_magnitude_16s_aligned16::t1() { printf("sse3_time: %f\n", total); for(int i = 0; i < 1; ++i) { - //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); - //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + // printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag()); + // printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]); } for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc index b99f1ddcf..1d475fb86 100644 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -24,6 +24,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { std::complex<float> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -42,6 +43,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); } end = clock(); @@ -65,6 +73,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc index 661801709..d20682147 100644 --- a/volk/lib/qa_32s_and_aligned16.cc +++ b/volk/lib/qa_32s_and_aligned16.cc @@ -26,6 +26,7 @@ void qa_32s_and_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -42,6 +43,13 @@ void qa_32s_and_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -55,6 +63,7 @@ void qa_32s_and_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc index 9da2ae344..bebf779b0 100644 --- a/volk/lib/qa_32s_or_aligned16.cc +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -26,6 +26,7 @@ void qa_32s_or_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -42,6 +43,13 @@ void qa_32s_or_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -55,6 +63,7 @@ void qa_32s_or_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc index 672f5662f..f27e60552 100644 --- a/volk/lib/qa_8s_convert_32f_aligned16.cc +++ b/volk/lib/qa_8s_convert_32f_aligned16.cc @@ -41,6 +41,14 @@ void qa_8s_convert_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am new file mode 100644 index 000000000..a469638c1 --- /dev/null +++ b/volk/orc/Makefile.am @@ -0,0 +1,52 @@ +# +# Copyright 2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS) + +include $(top_srcdir)/Makefile.common +lib_LTLIBRARIES = libvolk_orc.la +libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS) + +libvolk_orc_la_SOURCES = \ +volk_8s_convert_16s_aligned16_orc_impl.orc \ +volk_8s_convert_32f_aligned16_orc_impl.orc \ +volk_16u_byteswap_aligned16_orc_impl.orc \ +volk_32s_and_aligned16_orc_impl.orc \ +volk_32s_or_aligned16_orc_impl.orc \ +volk_32f_add_aligned16_orc_impl.orc \ +volk_32f_subtract_aligned16_orc_impl.orc \ +volk_32f_divide_aligned16_orc_impl.orc \ +volk_32f_multiply_aligned16_orc_impl.orc \ +volk_32f_sqrt_aligned16_orc_impl.orc \ +volk_32f_max_aligned16_orc_impl.orc \ +volk_32f_min_aligned16_orc_impl.orc \ +volk_32fc_magnitude_32f_aligned16_orc_impl.orc \ +volk_32fc_magnitude_16s_aligned16_orc_impl.orc \ +volk_16sc_magnitude_16s_aligned16_orc_impl.orc \ +volk_16sc_deinterleave_16s_aligned16_orc_impl.orc \ +volk_16sc_deinterleave_32f_aligned16_orc_impl.orc \ +volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc + + + +my_ORCC_FLAGS = --implementation $(ORCC_FLAGS) + +.orc.c: + $(ORCC) $(my_ORCC_FLAGS) -o $@ $< diff --git a/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..d226064a7 --- /dev/null +++ b/volk/orc/volk_16sc_deinterleave_16s_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_16sc_deinterleave_16s_aligned16_orc_impl +.dest 2 idst +.dest 2 qdst +.source 4 src +splitlw qdst, idst, src diff --git a/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..dddf682ca --- /dev/null +++ b/volk/orc/volk_16sc_deinterleave_32f_aligned16_orc_impl.orc @@ -0,0 +1,12 @@ +.function volk_16sc_deinterleave_32f_aligned16_orc_impl +.dest 4 idst +.dest 4 qdst +.source 4 src +.floatparam 4 scalar +.temp 8 iql +.temp 8 iqf + +x2 convswl iql, src +x2 convlf iqf, iql +x2 divf iqf, iqf, scalar +splitql qdst, idst, iqf diff --git a/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc new file mode 100644 index 000000000..609750096 --- /dev/null +++ b/volk/orc/volk_16sc_deinterleave_real_8s_aligned16_orc_impl.orc @@ -0,0 +1,6 @@ +.function volk_16sc_deinterleave_real_8s_aligned16_orc_impl +.dest 1 dst +.source 4 src +.temp 2 iw +select0lw iw, src +convhwb dst, iw diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..088f56312 --- /dev/null +++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc @@ -0,0 +1,23 @@ +.function volk_16sc_magnitude_16s_aligned16_orc_impl +.source 4 src +.dest 2 dst +.floatparam 4 scalar +.temp 8 iql +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl + +x2 convswl iql, src +x2 convlf iqf, iql +x2 divf iqf, iqf, scalar +x2 mulf prodiqf, iqf, iqf +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf rootf, sumf +mulf rootf, rootf, scalar +convfl rootl, rootf +convlw dst, rootl diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..6d2ed8197 --- /dev/null +++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,25 @@ +.function volk_16sc_magnitude_32f_aligned16_orc_impl +.source 4 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 reall +.temp 4 imagl +.temp 2 reals +.temp 2 imags +.temp 4 realf +.temp 4 imagf +.temp 4 sumf + + + +splitlw reals, imags, src +convswl reall, reals +convswl imagl, imags +convlf realf, reall +convlf imagf, imagl +divf realf, realf, scalar +divf imagf, imagf, scalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf dst, sumf diff --git a/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc b/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc new file mode 100644 index 000000000..3ffd12ec0 --- /dev/null +++ b/volk/orc/volk_16u_byteswap_aligned16_orc_impl.orc @@ -0,0 +1,3 @@ +.function volk_16u_byteswap_aligned16_orc_impl +.dest 2 dst +swapw dst, dst diff --git a/volk/orc/volk_32f_add_aligned16_orc_impl.orc b/volk/orc/volk_32f_add_aligned16_orc_impl.orc new file mode 100644 index 000000000..20e000f68 --- /dev/null +++ b/volk/orc/volk_32f_add_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_add_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +addf dst, src1, src2 diff --git a/volk/orc/volk_32f_divide_aligned16_orc_impl.orc b/volk/orc/volk_32f_divide_aligned16_orc_impl.orc new file mode 100644 index 000000000..870843f2a --- /dev/null +++ b/volk/orc/volk_32f_divide_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_divide_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +divf dst, src1, src2 diff --git a/volk/orc/volk_32f_max_aligned16_orc_impl.orc b/volk/orc/volk_32f_max_aligned16_orc_impl.orc new file mode 100644 index 000000000..97f48ba4a --- /dev/null +++ b/volk/orc/volk_32f_max_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_max_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +maxf dst, src1, src2 diff --git a/volk/orc/volk_32f_min_aligned16_orc_impl.orc b/volk/orc/volk_32f_min_aligned16_orc_impl.orc new file mode 100644 index 000000000..a597933de --- /dev/null +++ b/volk/orc/volk_32f_min_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_min_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +minf dst, src1, src2 diff --git a/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc b/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc new file mode 100644 index 000000000..23619af4e --- /dev/null +++ b/volk/orc/volk_32f_multiply_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_multiply_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +mulf dst, src1, src2 diff --git a/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc b/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc new file mode 100644 index 000000000..0983271db --- /dev/null +++ b/volk/orc/volk_32f_sqrt_aligned16_orc_impl.orc @@ -0,0 +1,4 @@ +.function volk_32f_sqrt_aligned16_orc_impl +.source 4 src +.dest 4 dst +sqrtf dst, src diff --git a/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc b/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc new file mode 100644 index 000000000..17dbcad46 --- /dev/null +++ b/volk/orc/volk_32f_subtract_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32f_subtract_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +subf dst, src1, src2 diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..15f8fdff0 --- /dev/null +++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc @@ -0,0 +1,23 @@ +.function volk_32fc_magnitude_16s_aligned16_orc_impl +.source 8 src +.dest 2 dst +.floatparam 4 scalar +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl +.temp 4 maskl + +x2 mulf prodiqf, src, src +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf rootf, sumf +mulf rootf, rootf, scalar +cmpltf maskl, scalar, rootf +andl maskl, maskl, 0x80000000 +orl rootf, rootf, maskl +convfl rootl, rootf +convssslw dst, rootl diff --git a/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..47a10531d --- /dev/null +++ b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,13 @@ +.function volk_32fc_magnitude_32f_aligned16_orc_impl +.source 8 src +.dest 4 dst +.temp 8 iqf +.temp 8 prodiqf +.temp 4 qf +.temp 4 if +.temp 4 sumf + +x2 mulf prodiqf, src, src +splitql qf, if, prodiqf +addf sumf, if, qf +sqrtf dst, sumf diff --git a/volk/orc/volk_32s_and_aligned16_orc_impl.orc b/volk/orc/volk_32s_and_aligned16_orc_impl.orc new file mode 100644 index 000000000..9d3c7b733 --- /dev/null +++ b/volk/orc/volk_32s_and_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32s_and_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +andl dst, src1, src2 diff --git a/volk/orc/volk_32s_or_aligned16_orc_impl.orc b/volk/orc/volk_32s_or_aligned16_orc_impl.orc new file mode 100644 index 000000000..6d2a3859a --- /dev/null +++ b/volk/orc/volk_32s_or_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32s_or_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +orl dst, src1, src2 diff --git a/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc b/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..8322b529a --- /dev/null +++ b/volk/orc/volk_8s_convert_16s_aligned16_orc_impl.orc @@ -0,0 +1,4 @@ +.function volk_8s_convert_16s_aligned16_orc_impl +.source 1 src +.dest 2 dst +mulsbw dst, src, 255 diff --git a/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc b/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..91a0084d7 --- /dev/null +++ b/volk/orc/volk_8s_convert_32f_aligned16_orc_impl.orc @@ -0,0 +1,9 @@ +.function volk_8s_convert_32f_aligned16_orc_impl +.source 2 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 flsrc +.temp 4 lsrc +convswl lsrc, src +convlf flsrc, lsrc +mulf dst, flsrc, scalar diff --git a/volk/volk.pc.in b/volk/volk.pc.in index a24298856..b03dbdada 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -10,6 +10,6 @@ Name: volk Description: VOLK.. Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime +Libs: -lvolk -lvolk_runtime -lvolk_orc Cflags: -I${includedir} ${LV_CXXFLAGS} |