diff options
159 files changed, 2892 insertions, 1879 deletions
diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt new file mode 100644 index 000000000..f333d939b --- /dev/null +++ b/volk/CMakeLists.txt @@ -0,0 +1,87 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Project setup +######################################################################## +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +IF(NOT DEFINED CMAKE_BUILD_TYPE) + SET(CMAKE_BUILD_TYPE Release) +ENDIF() +SET(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel") +PROJECT(volk) +ENABLE_LANGUAGE(CXX) +ENABLE_LANGUAGE(C) +ENABLE_TESTING() +SET(VERSION 0.1) +SET(LIBVER 0.0.0) + +######################################################################## +# Dependencies setup +######################################################################## +FIND_PACKAGE(PythonInterp) +IF(NOT PYTHONINTERP_FOUND) + MESSAGE(FATAL_ERROR "Python interpreter required by the build system.") +ENDIF(NOT PYTHONINTERP_FOUND) + +######################################################################## +# Setup the package config file +######################################################################## +#set variables found in the pc.in file +SET(prefix ${CMAKE_INSTALL_PREFIX}) +SET(exec_prefix "\${prefix}") +SET(libdir "\${exec_prefix}/lib${LIB_SUFFIX}") +SET(includedir "\${prefix}/include") + +CONFIGURE_FILE( + ${CMAKE_CURRENT_SOURCE_DIR}/volk.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/volk.pc +@ONLY) + +INSTALL( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc + DESTINATION lib${LIB_SUFFIX}/pkgconfig +) + +######################################################################## +# Install all headers in the include directories +######################################################################## +INSTALL( + DIRECTORY ${CMAKE_SOURCE_DIR}/include/volk + DESTINATION include FILES_MATCHING PATTERN "*.h" +) + +INSTALL(FILES + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h +DESTINATION include/volk) + +######################################################################## +# Setup the library +######################################################################## +ADD_SUBDIRECTORY(lib) + +######################################################################## +# And the utility apps +######################################################################## +ADD_SUBDIRECTORY(apps) + +######################################################################## +# Print summary +######################################################################## +MESSAGE(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}") diff --git a/volk/Makefile.am b/volk/Makefile.am index 03c5aac35..829c37b78 100644 --- a/volk/Makefile.am +++ b/volk/Makefile.am @@ -23,7 +23,7 @@ ACLOCAL_AMFLAGS = -I config include $(top_srcdir)/Makefile.common -EXTRA_DIST = bootstrap configure config.h.in volk_config.h +EXTRA_DIST = bootstrap configure config.h.in SUBDIRS = config if LV_HAVE_ORC SUBDIRS += orc @@ -38,7 +38,6 @@ pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = volk.pc distclean-local: - -rm -f config/lv_set_simd_flags.m4 -rm -rf autom4te.cache -rm -f config.* -rm -f depcomp @@ -61,3 +60,6 @@ distclean-local: -rm -f configure -rm -f orc/Makefile.in -rm -f orc/*.c + -rm -rf gen/config + -rm -rf gen/include + -rm -rf gen/lib diff --git a/volk/Makefile.common b/volk/Makefile.common index eca2c6516..b9949cb33 100644 --- a/volk/Makefile.common +++ b/volk/Makefile.common @@ -20,21 +20,8 @@ # Boston, MA 02110-1301, USA. # -if MD_CPU_generic - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_generic.c -endif - -if MD_CPU_x86 - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_x86.c -endif - -if MD_CPU_powerpc - platform_CODE = \ - $(top_srcdir)/lib/volk_cpu_powerpc.c -endif - +#define gendir for files generated during bootstrap +top_gendir = $(top_srcdir)/gen ourincludedir = $(includedir)/volk diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt new file mode 100644 index 000000000..a0bf7e900 --- /dev/null +++ b/volk/apps/CMakeLists.txt @@ -0,0 +1,38 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Setup profiler +######################################################################## +IF(MSVC) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) +ENDIF(MSVC) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/lib + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc +) + +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc new file mode 100644 index 000000000..fd9507207 --- /dev/null +++ b/volk/apps/volk_profile.cc @@ -0,0 +1,135 @@ +#include "qa_utils.h" +extern "C" { +#include <volk/volk.h> +#include <volk/volk_prefs.h> +} +#include <vector> +#include <boost/foreach.hpp> +#include <iostream> +#include <fstream> +#include <sys/stat.h> +#include <sys/types.h> + +int main(int argc, char *argv[]) { + + std::vector<std::string> results; + + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + char path[256]; + get_config_path(path); + std::string config_path(path); + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + if(!config.is_open()) { //either we don't have write access or we don't have the dir yet + std::string dir(getenv("HOME")); + dir += "/.volk"; + if(mkdir(dir.c_str(), 0777) == -1) { + std::cout << "Error creating directory " << dir << std::endl; + return -1; + } + config.open(config_path.c_str()); + if(!config.is_open()) { + std::cout << "Error opening file " << config_path << std::endl; + return -1; + } + } + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); +} diff --git a/volk/bootstrap b/volk/bootstrap index ff239c88c..a7fb78ac5 100755 --- a/volk/bootstrap +++ b/volk/bootstrap @@ -20,8 +20,10 @@ # Boston, MA 02110-1301, USA. rm -fr config.cache autom4te*.cache -cd include/volk && chmod +x volk_register.py && ./volk_register.py && cd ../.. -aclocal -I config +python -B gen/volk_register.py +mv gen/lib/Makefile.am lib/ + +aclocal -I config -I gen/config autoconf autoheader libtoolize --automake diff --git a/volk/config/Makefile.am b/volk/config/Makefile.am index 27e3f1296..d4786f83a 100644 --- a/volk/config/Makefile.am +++ b/volk/config/Makefile.am @@ -30,7 +30,6 @@ m4macros = \ ax_boost_base.m4 \ ax_boost_unit_test_framework.m4 \ bnv_have_qt.m4 \ - cppunit.m4 \ gr_lib64.m4 \ gr_libgnuradio_core_extra_ldflags.m4 \ gr_no_undefined.m4 \ @@ -43,7 +42,7 @@ m4macros = \ lf_cxx.m4 \ lf_warnings.m4 \ lf_x11.m4 \ - lv_set_simd_flags.m4 \ + $(top_gendir)/config/lv_set_simd_flags.m4 \ mkstemp.m4 \ onceonly.m4 \ pkg.m4 \ diff --git a/volk/config/cppunit.m4 b/volk/config/cppunit.m4 deleted file mode 100644 index 0991d51ec..000000000 --- a/volk/config/cppunit.m4 +++ /dev/null @@ -1,80 +0,0 @@ -dnl -dnl AM_PATH_CPPUNIT(MINIMUM-VERSION, [ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) -dnl -AC_DEFUN([AM_PATH_CPPUNIT], -[ - -AC_ARG_WITH(cppunit-prefix,[ --with-cppunit-prefix=PFX Prefix where CppUnit is installed (optional)], - cppunit_config_prefix="$withval", cppunit_config_prefix="") -AC_ARG_WITH(cppunit-exec-prefix,[ --with-cppunit-exec-prefix=PFX Exec prefix where CppUnit is installed (optional)], - cppunit_config_exec_prefix="$withval", cppunit_config_exec_prefix="") - - if test x$cppunit_config_exec_prefix != x ; then - cppunit_config_args="$cppunit_config_args --exec-prefix=$cppunit_config_exec_prefix" - if test x${CPPUNIT_CONFIG+set} != xset ; then - CPPUNIT_CONFIG=$cppunit_config_exec_prefix/bin/cppunit-config - fi - fi - if test x$cppunit_config_prefix != x ; then - cppunit_config_args="$cppunit_config_args --prefix=$cppunit_config_prefix" - if test x${CPPUNIT_CONFIG+set} != xset ; then - CPPUNIT_CONFIG=$cppunit_config_prefix/bin/cppunit-config - fi - fi - - AC_PATH_PROG(CPPUNIT_CONFIG, cppunit-config, no) - cppunit_version_min=$1 - - AC_MSG_CHECKING(for Cppunit - version >= $cppunit_version_min) - no_cppunit="" - if test "$CPPUNIT_CONFIG" = "no" ; then - no_cppunit=yes - else - CPPUNIT_CFLAGS=`$CPPUNIT_CONFIG --cflags` - CPPUNIT_LIBS=`$CPPUNIT_CONFIG --libs` - cppunit_version=`$CPPUNIT_CONFIG --version` - - cppunit_major_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\1/'` - cppunit_minor_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\2/'` - cppunit_micro_version=`echo $cppunit_version | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` - - cppunit_major_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\1/'` - cppunit_minor_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\2/'` - cppunit_micro_min=`echo $cppunit_version_min | \ - sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` - - cppunit_version_proper=`expr \ - $cppunit_major_version \> $cppunit_major_min \| \ - $cppunit_major_version \= $cppunit_major_min \& \ - $cppunit_minor_version \> $cppunit_minor_min \| \ - $cppunit_major_version \= $cppunit_major_min \& \ - $cppunit_minor_version \= $cppunit_minor_min \& \ - $cppunit_micro_version \>= $cppunit_micro_min ` - - if test "$cppunit_version_proper" = "1" ; then - AC_MSG_RESULT([$cppunit_major_version.$cppunit_minor_version.$cppunit_micro_version]) - else - AC_MSG_RESULT(no) - no_cppunit=yes - fi - fi - - if test "x$no_cppunit" = x ; then - ifelse([$2], , :, [$2]) - else - CPPUNIT_CFLAGS="" - CPPUNIT_LIBS="" - ifelse([$3], , :, [$3]) - fi - - AC_SUBST(CPPUNIT_CFLAGS) - AC_SUBST(CPPUNIT_LIBS) -]) - - - diff --git a/volk/config/lf_warnings.m4 b/volk/config/lf_warnings.m4 index d40c77f14..e62fb276c 100644 --- a/volk/config/lf_warnings.m4 +++ b/volk/config/lf_warnings.m4 @@ -29,7 +29,8 @@ dnl distribution terms that you use for the rest of that program. # ------------------------------------------------------------------------- AC_DEFUN([LF_CHECK_CXX_FLAG],[ - echo 'void f(){}' > conftest.cc + echo "#include <stdio.h> +int main(int argc, char **argv){return 0;}" > conftest.cc for i in $1 do AC_MSG_CHECKING([whether $CXX accepts $i]) @@ -54,7 +55,8 @@ AC_DEFUN([LF_CHECK_CXX_FLAG],[ # ------------------------------------------------------------------------- AC_DEFUN([LF_CHECK_CC_FLAG],[ - echo 'void f(){}' > conftest.c + echo "#include <stdio.h> +int main(int argc, char **argv){return 0;}" > conftest.c for i in $1 do AC_MSG_CHECKING([whether $CC accepts $i]) diff --git a/volk/config/lv_configure.m4 b/volk/config/lv_configure.m4 index dfa490cdf..358fba030 100755..100644 --- a/volk/config/lv_configure.m4 +++ b/volk/config/lv_configure.m4 @@ -109,14 +109,6 @@ dnl AM_CONDITIONAL([USE_PYTHON], [test "$with_python" = yes]) AC_CHECK_PROG([XMLTO],[xmlto],[yes],[]) AM_CONDITIONAL([HAS_XMLTO], [test x$XMLTO = xyes]) - dnl Define where to look for cppunit includes and libs - dnl sets CPPUNIT_CFLAGS and CPPUNIT_LIBS - dnl Try using pkg-config first, then fall back to cppunit-config. - PKG_CHECK_EXISTS(cppunit, - [PKG_CHECK_MODULES(CPPUNIT, cppunit >= 1.9.14)], - [AM_PATH_CPPUNIT([1.9.14],[], - [AC_MSG_ERROR([VOLK requires cppunit. Stop])])]) - dnl PKG_CHECK_MODULES(GNURADIO_CORE, gnuradio-core >= 3) dnl LIBS="$LIBS $GNURADIO_CORE_LIBS" ]) diff --git a/volk/configure.ac b/volk/configure.ac index c493adad6..fa3a90c7f 100644 --- a/volk/configure.ac +++ b/volk/configure.ac @@ -59,7 +59,6 @@ dnl AX_BOOST_TEST_EXEC_MONITOR AX_BOOST_UNIT_TEST_FRAMEWORK dnl AX_BOOST_WSERIALIZATION -AC_CONFIG_HEADERS([volk_config.h]) LV_SET_SIMD_FLAGS AC_CONFIG_FILES([\ diff --git a/volk/gen/.gitignore b/volk/gen/.gitignore new file mode 100644 index 000000000..a1c468f93 --- /dev/null +++ b/volk/gen/.gitignore @@ -0,0 +1,3 @@ +/config +/include +/lib diff --git a/volk/include/volk/archs.xml b/volk/gen/archs.xml index a19a5add9..977cc7924 100644 --- a/volk/include/volk/archs.xml +++ b/volk/gen/archs.xml @@ -5,12 +5,6 @@ <flag>none</flag> </arch> -<arch name="orc" type="all"> - <flag>lorc-0.4</flag> - <overrule>LV_HAVE_ORC</overrule> - <overrule_val>no</overrule_val> -</arch> - <arch name="altivec" type="powerpc"> <flag>maltivec</flag> </arch> @@ -83,6 +77,12 @@ <flag>msse2</flag> </arch> +<arch name="orc" type="all"> + <flag>lorc-0.4</flag> + <overrule>LV_HAVE_ORC</overrule> + <overrule_val>no</overrule_val> +</arch> + <arch name="sse3" type="x86"> <val>1</val> <op>1</op> @@ -126,7 +126,6 @@ <flag>msse4.2</flag> </arch> - <arch name="avx" type="x86"> <val>1</val> <op>1</op> @@ -135,5 +134,4 @@ <flag>mavx</flag> </arch> - </grammar> diff --git a/volk/gen/machines.xml b/volk/gen/machines.xml new file mode 100644 index 000000000..1f8b201ae --- /dev/null +++ b/volk/gen/machines.xml @@ -0,0 +1,59 @@ +<grammar> + +<machine name="generic"> +<archs>generic</archs> +</machine> + +<!-- +<machine name="mmx"> +<archs>generic 32|64 mmx</archs> +</machine> + +<machine name="sse"> +<archs>generic 32|64 mmx sse</archs> +</machine> +--> + +<!-- +Create an SSE2 only machine (without 64/32 inline assembly support). +This machine is intended to support the MSVC compiler on x86/amd64. +The MSVC compiler has intrinsic support for SSE and SSE2, +however it does not support the gcc style inline assembly. +--> +<machine name="sse2_only"> +<archs>generic mmx sse sse2</archs> +</machine> + +<machine name="sse2"> +<archs>generic 32|64 mmx sse sse2</archs> +</machine> + +<machine name="sse3"> +<archs>generic 32|64 mmx sse sse2 sse3</archs> +</machine> + +<machine name="ssse3"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3</archs> +</machine> + +<machine name="sse4_a"> +<archs>generic 32|64 mmx sse sse2 sse3 sse4_a popcount</archs> +</machine> + +<machine name="sse4_1"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1</archs> +</machine> + +<machine name="sse4_2"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount</archs> +</machine> + +<machine name="avx"> +<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx</archs> +</machine> + +<machine name="altivec"> +<archs>generic altivec</archs> +</machine> + +</grammar> diff --git a/volk/gen/make_c.py b/volk/gen/make_c.py new file mode 100644 index 000000000..e946152d0 --- /dev/null +++ b/volk/gen/make_c.py @@ -0,0 +1,84 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * +import string + +#ok todo list: +#put n_archs into the info struct so it doesn't have to be arch_defs[0]. + +def make_c(machines, archs, functions, arched_arglist, my_arglist): + tempstring = r""" +// This file is automatically generated by make_c.py. +// Do not edit this file. +""" + tempstring += """ +#include <volk/volk_common.h> +#include "volk_machines.h" +#include <volk/volk_typedefs.h> +#include <volk/volk_cpu.h> +#include "volk_rank_archs.h" +#include <volk/volk.h> +#include <stdio.h> +#include <string.h> + +""" + +#OK here's the deal. the .h prototypes the functions. the .c impls them as fptrs, can use p_whatever. +#also .c impls the get_machine call +#also .c impls the default call for each fn + +#here do static fn get arch + tempstring += r""" +struct volk_machine *get_machine(void) { + extern struct volk_machine *volk_machines[]; + extern unsigned int n_volk_machines; + static struct volk_machine *machine = NULL; + + if(machine != NULL) return machine; + else { + unsigned int max_score = 0; + int i; + for(i=0; i<n_volk_machines; i++) { + if(!(volk_machines[i]->caps & (~volk_get_lvarch()))) { + if(volk_machines[i]->caps > max_score) { + max_score = volk_machines[i]->caps; + machine = volk_machines[i]; + } + } + } + printf("Using Volk machine: %s\n", machine->name); + return machine; + } +} + +""" + + for i in range(len(functions)): + tempstring += "void get_" + functions[i] + replace_arch.sub("", arched_arglist[i]) + "\n" + tempstring += " %s = get_machine()->%s_archs[volk_rank_archs(get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs, get_machine()->%s_name, volk_get_lvarch())];\n" % (functions[i], functions[i], functions[i], functions[i], functions[i], functions[i]) + tempstring += " %s(%s);\n}\n\n" % (functions[i], my_arglist[i]) + tempstring += replace_volk.sub("p", functions[i]) + " " + functions[i] + " = &get_" + functions[i] + ";\n\n" + tempstring += "void %s_manual%s\n" % (functions[i], arched_arglist[i]) + tempstring += " get_machine()->%s_archs[get_index(get_machine()->%s_indices, get_machine()->%s_n_archs, arch)](%s);\n}\n" % (functions[i], functions[i], functions[i], my_arglist[i]) + tempstring += "struct volk_func_desc %s_get_func_desc(void) {\n" % (functions[i]) + tempstring += " struct volk_func_desc desc = {get_machine()->%s_indices, get_machine()->%s_arch_defs, get_machine()->%s_n_archs};\n" % (functions[i], functions[i], functions[i]) + tempstring += " return desc;\n}\n" + + return tempstring + + diff --git a/volk/include/volk/make_config_fixed.py b/volk/gen/make_config_fixed.py index 3fd1bdf0a..3fd1bdf0a 100644 --- a/volk/include/volk/make_config_fixed.py +++ b/volk/gen/make_config_fixed.py diff --git a/volk/include/volk/make_config_in.py b/volk/gen/make_config_in.py index d29680af2..d29680af2 100644 --- a/volk/include/volk/make_config_in.py +++ b/volk/gen/make_config_in.py diff --git a/volk/gen/make_cpuid_c.py b/volk/gen/make_cpuid_c.py new file mode 100644 index 000000000..2fdbaf304 --- /dev/null +++ b/volk/gen/make_cpuid_c.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from xml.dom import minidom + +HEADER_TEMPL = """\ +/*this file is auto_generated by volk_register.py*/ + +#include <volk/volk_cpu.h> +#include <volk/volk_config_fixed.h> + +struct VOLK_CPU volk_cpu; + +#if defined(__i386__) || (__x86_64__) + +//implement get cpuid for gcc compilers using a copy of cpuid.h +#if defined(__GNUC__) +#include <gcc_x86_cpuid.h> +#define cpuid_x86(op, r) __get_cpuid(op, r+0, r+1, r+2, r+3) + +//implement get cpuid for MSVC compilers using __cpuid intrinsic +#elif defined(_MSC_VER) +#include <intrin.h> +#define cpuid(op, r) __cpuid(r, op) + +#else +#error "A get cpuid for volk is not available on this compiler..." +#endif + +static inline unsigned int cpuid_eax(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[0]; +} + +static inline unsigned int cpuid_ebx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[1]; +} + +static inline unsigned int cpuid_ecx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[2]; +} + +static inline unsigned int cpuid_edx(unsigned int op) { + unsigned int regs[4]; + cpuid_x86 (op, regs); + return regs[3]; +} +#endif + +""" + +def make_cpuid_c(dom) : + tempstring = HEADER_TEMPL; + + for domarch in dom: + if str(domarch.attributes["type"].value) == "x86": + if "no_test" in domarch.attributes.keys(): + no_test = str(domarch.attributes["no_test"].value); + if no_test == "true": + no_test = True; + else: + no_test = False; + else: + no_test = False; + arch = str(domarch.attributes["name"].value); + op = domarch.getElementsByTagName("op"); + if op: + op = str(op[0].firstChild.data); + reg = domarch.getElementsByTagName("reg"); + if reg: + reg = str(reg[0].firstChild.data); + shift = domarch.getElementsByTagName("shift"); + if shift: + shift = str(shift[0].firstChild.data); + val = domarch.getElementsByTagName("val"); + if val: + val = str(val[0].firstChild.data); + + if no_test: + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + return 1; +#else + return 0; +#endif +} + +""" % (arch) + + elif op == "1": + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + unsigned int e%sx = cpuid_e%sx (%s); + return ((e%sx >> %s) & 1) == %s; +#else + return 0; +#endif +} + +""" % (arch, reg, reg, op, reg, shift, val) + + elif op == "0x80000001": + tempstring = tempstring + """\ +int i_can_has_%s () { +#if defined(__i386__) || (__x86_64__) + unsigned int extended_fct_count = cpuid_eax(0x80000000); + if (extended_fct_count < 0x80000001) + return %s^1; + unsigned int extended_features = cpuid_e%sx (%s); + return ((extended_features >> %s) & 1) == %s; +#else + return 0; +#endif +} + +""" % (arch, val, reg, op, shift, val) + + elif str(domarch.attributes["type"].value) == "powerpc": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { +#ifdef __PPC__ + return 1; +#else + return 0; +#endif +} + +""" % (arch) + + elif str(domarch.attributes["type"].value) == "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { + return 1; +} + +""" % (arch) + else: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + """\ +int i_can_has_%s () { + return 0; +} + +""" % (arch) + + tempstring = tempstring + "void volk_cpu_init() {\n"; + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" + tempstring = tempstring + "}\n\n" + + tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; + tempstring = tempstring + " unsigned int retval = 0;\n" + tempstring = tempstring + " volk_cpu_init();\n" + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" + tempstring = tempstring + " return retval;\n" + tempstring = tempstring + "}\n\n" + + return tempstring; + + + + + + + diff --git a/volk/include/volk/make_cpuid_h.py b/volk/gen/make_cpuid_h.py index cd3da2455..4fe5c4e07 100644 --- a/volk/include/volk/make_cpuid_h.py +++ b/volk/gen/make_cpuid_h.py @@ -21,14 +21,14 @@ # from xml.dom import minidom -from emit_omnilog import * def make_cpuid_h(dom) : tempstring = ""; tempstring = tempstring +'/*this file is auto generated by volk_register.py*/'; tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_CPU_H'; tempstring = tempstring +'\n#define INCLUDED_VOLK_CPU_H\n\n'; - tempstring = tempstring + emit_prolog(); + tempstring = tempstring + "#include <volk/volk_common.h>\n\n"; + tempstring = tempstring + '__VOLK_DECL_BEGIN\n'; tempstring = tempstring + '\n' tempstring = tempstring + "struct VOLK_CPU {\n" @@ -42,7 +42,7 @@ def make_cpuid_h(dom) : tempstring = tempstring + "unsigned int volk_get_lvarch ();\n" tempstring = tempstring + "\n"; - tempstring = tempstring + emit_epilog(); + tempstring = tempstring + "__VOLK_DECL_END\n"; tempstring = tempstring + "#endif /*INCLUDED_VOLK_CPU_H*/\n" return tempstring; diff --git a/volk/gen/make_each_machine_c.py b/volk/gen/make_each_machine_c.py new file mode 100644 index 000000000..d4f5f01de --- /dev/null +++ b/volk/gen/make_each_machine_c.py @@ -0,0 +1,86 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * +import string + +def _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist): + + #make the machine fcountlist and taglist a subset given the archs list + machine_fcountlists = list() + machine_taglists = list() + for i in range(len(fcountlist)): + machine_fcountlist = list() + machine_taglist = list() + for j in range(len(fcountlist[i])): + if len(set(archs).intersection(map(str.lower, fcountlist[i][j]))) == len(fcountlist[i][j]): + machine_fcountlist.append(fcountlist[i][j]) + machine_taglist.append(taglist[i][j]) + machine_fcountlists.append(machine_fcountlist) + machine_taglists.append(machine_taglist) + + #create the volk machine struct for this machine file + tempstring = "" + tempstring += "struct volk_machine volk_machine_" + machine_name + " = {\n" + tempstring += " " + ' | '.join(["(1 << LV_" + arch.swapcase() + ")" for arch in archs]) + ",\n" + tempstring += " \"%s\",\n"%machine_name + + #fill in the description for each function + for i in range(len(functions)): + tempstring += " \"%s\",\n"%functions[i] + tempstring += " {%s},\n"%(', '.join(['"%s"'%tag for tag in machine_taglists[i]])) + tempstring += " {%s},\n"%(', '.join([' | '.join(['(1 << LV_%s)'%fc for fc in fcount]) for fcount in machine_fcountlists[i]])) + tempstring += " {%s},\n"%(', '.join(['%s_%s'%(functions[i], tag) for tag in machine_taglists[i]])) + tempstring += " %d,\n"%len(machine_taglists[i]) + + tempstring = strip_trailing(tempstring, ",") + tempstring += "};\n" + return tempstring + +def make_each_machine_c(machine_name, archs, functions, fcountlist, taglist): + + tempstring = r""" +// This file is automatically generated by make_each_machine_c.py. +// Do not edit this file. +""" + for arch in archs: + tempstring += "#define LV_HAVE_" + arch.swapcase() + " 1\n" + + tempstring += """ +#include <volk/volk_common.h> +#include "volk_machines.h" +#include <volk/volk_config_fixed.h> + +""" + for func in functions: + tempstring += "#include <volk/" + func + ".h>\n" + tempstring += "\n\n" + + tempstring += """ +#ifdef LV_HAVE_ORC +%s +#else +%s +#endif +"""%( + _make_each_machine_struct(machine_name, archs+["orc"], functions, fcountlist, taglist), + _make_each_machine_struct(machine_name, archs, functions, fcountlist, taglist) +) + + return tempstring + + diff --git a/volk/include/volk/make_environment_init_c.py b/volk/gen/make_environment_init_c.py index e06c7f246..263d5bcd1 100644 --- a/volk/include/volk/make_environment_init_c.py +++ b/volk/gen/make_environment_init_c.py @@ -4,13 +4,12 @@ def make_environment_init_c(dom) : tempstring = ""; tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; tempstring = tempstring + "#include<volk/volk_environment_init.h>\n" - tempstring = tempstring + "#include<volk/volk_config.h>\n" for domarch in dom: arch = str(domarch.attributes["name"].value); incs = domarch.getElementsByTagName("include"); for inc in incs: my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; + tempstring = tempstring + "#ifdef LV_HAVE_" + arch.swapcase() + "\n"; tempstring = tempstring + "#include<" + my_inc + ">\n"; tempstring = tempstring + "#endif\n" tempstring = tempstring + '\n\n'; @@ -21,7 +20,7 @@ def make_environment_init_c(dom) : envs = domarch.getElementsByTagName("environment"); for env in envs: cmd = str(env.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; + tempstring = tempstring + "#ifdef LV_HAVE_" + arch.swapcase() + "\n"; tempstring = tempstring + " " + cmd + "\n"; tempstring = tempstring + "#endif\n" diff --git a/volk/include/volk/make_environment_init_h.py b/volk/gen/make_environment_init_h.py index 77a841a24..655d73f54 100644 --- a/volk/include/volk/make_environment_init_h.py +++ b/volk/gen/make_environment_init_h.py @@ -1,5 +1,4 @@ from xml.dom import minidom -from emit_omnilog import * def make_environment_init_h() : tempstring = ""; @@ -7,9 +6,10 @@ def make_environment_init_h() : tempstring = tempstring + "#ifndef INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; tempstring = tempstring + "#define INCLUDE_LIBVECTOR_ENVIRONMENT_INIT_H\n"; tempstring = tempstring + "\n"; - tempstring = tempstring + emit_prolog(); + tempstring = tempstring + "#include <volk/volk_common.h>\n\n"; + tempstring = tempstring + "__VOLK_DECL_BEGIN\n"; tempstring = tempstring + "void volk_environment_init();\n"; - tempstring = tempstring + emit_epilog(); + tempstring = tempstring + "__VOLK_DECL_END\n"; tempstring = tempstring + "#endif\n" return tempstring; diff --git a/volk/gen/make_h.py b/volk/gen/make_h.py new file mode 100644 index 000000000..3d5790de4 --- /dev/null +++ b/volk/gen/make_h.py @@ -0,0 +1,35 @@ +from xml.dom import minidom +from volk_regexp import * + +def make_h(funclist, arched_arglist) : + tempstring = ""; + tempstring = tempstring + '/*this file is auto generated by make_h.py*/\n'; + + tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_RUNTIME'; + tempstring = tempstring + '\n#define INCLUDED_VOLK_RUNTIME'; + tempstring = tempstring + '\n\n#include<volk/volk_typedefs.h>\n'; + tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; + tempstring = tempstring + '#include<volk/volk_common.h>\n'; + tempstring = tempstring + '#include<volk/volk_complex.h>\n'; + tempstring = tempstring + '__VOLK_DECL_BEGIN\n'; + + tempstring = tempstring + '\n'; + + tempstring += """ +struct volk_func_desc { + const char **indices; + const int *arch_defs; + const int n_archs; +}; +""" + for i in range(len(funclist)): + tempstring += "extern " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n" + tempstring += "extern VOLK_API void %s_manual%s;\n" % (funclist[i], arched_arglist[i]) + tempstring = strip_trailing(tempstring, " {") + tempstring += "extern VOLK_API struct volk_func_desc %s_get_func_desc(void);\n" % (funclist[i]) + + tempstring = tempstring + '__VOLK_DECL_END\n'; + tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n"; + + return tempstring; + diff --git a/volk/gen/make_machines_c.py b/volk/gen/make_machines_c.py new file mode 100644 index 000000000..a7ab63d6e --- /dev/null +++ b/volk/gen/make_machines_c.py @@ -0,0 +1,41 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * + +def make_machines_c(machines): + tempstring = r""" +// This file is automatically generated by make_machines_c.py. +// Do not edit this file. + +#include <volk/volk_common.h> +#include <volk/volk_typedefs.h> +#include "volk_machines.h" + +struct volk_machine *volk_machines[] = { +""" + for machine in machines: + tempstring += """#if LV_MACHINE_""" + machine.swapcase() + "\n" + tempstring += "&volk_machine_" + machine + tempstring += "," + tempstring += "\n#endif\n" + + tempstring += r""" +}; +unsigned int n_volk_machines = sizeof(volk_machines)/sizeof(*volk_machines); +""" + return tempstring diff --git a/volk/gen/make_machines_h.py b/volk/gen/make_machines_h.py new file mode 100644 index 000000000..563de18a6 --- /dev/null +++ b/volk/gen/make_machines_h.py @@ -0,0 +1,58 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from volk_regexp import * + +def make_machines_h(functions, machines, archs): + tempstring = r""" +// This file is automatically generated by make_machines_h.py. +// Do not edit this file. + +#ifndef INCLUDED_LIBVOLK_MACHINES_H +#define INCLUDED_LIBVOLK_MACHINES_H + +#include <volk/volk_common.h> +#include <volk/volk_typedefs.h> + +__VOLK_DECL_BEGIN + +struct volk_machine { + const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format) + const char *name; +""" + for function in functions: + tempstring += " const char *%s_name;\n"%function + tempstring += " const char *%s_indices[%d];\n"%(function, len(archs)) + tempstring += " const int %s_arch_defs[%d];\n"%(function, len(archs)) + tempstring += " const %s %s_archs[%d];\n"%(replace_volk.sub("p", function), function, len(archs)) + tempstring += " const int %s_n_archs;\n"%function + + tempstring += r"""}; + +""" + for machine in machines: + tempstring += """#if LV_MACHINE_""" + machine.swapcase() + "\n" + tempstring += "extern struct volk_machine volk_machine_" + machine + ";\n" + tempstring += """#endif\n""" + + tempstring += r""" + +__VOLK_DECL_END + +#endif //INCLUDED_LIBVOLK_MACHINES_H""" + + return tempstring diff --git a/volk/gen/make_makefile_am.py b/volk/gen/make_makefile_am.py new file mode 100644 index 000000000..d700626bb --- /dev/null +++ b/volk/gen/make_makefile_am.py @@ -0,0 +1,123 @@ +# +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from xml.dom import minidom + +def make_makefile_am(dom, machines, archflags_dict): + tempstring = r""" +# This file is automatically generated by make_makefile_am.py. +# Do not edit this file. + +include $(top_srcdir)/Makefile.common + +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ + -I$(top_srcdir)/include \ + -I$(top_gendir)/include \ + -Dvolk_EXPORTS \ + -fvisibility=hidden \ + $(WITH_INCLUDES) + +lib_LTLIBRARIES = \ + libvolk.la + +EXTRA_DIST = \ + volk_rank_archs.h \ + gcc_x86_cpuid.h + +# ---------------------------------------------------------------- +# The main library +# ---------------------------------------------------------------- + +libvolk_la_SOURCES = \ + $(platform_CODE) \ + $(top_gendir)/lib/volk.c \ + $(top_gendir)/lib/volk_cpu.c \ + volk_rank_archs.c \ + volk_prefs.c \ + $(top_gendir)/lib/volk_machines.c + +if LV_HAVE_ORC +volk_orc_CFLAGS = -DLV_HAVE_ORC=1 +volk_orc_LDFLAGS = $(ORC_LDFLAGS) -lorc-0.4 +volk_orc_LIBADD = ../orc/libvolk_orc.la +else +volk_orc_CFLAGS = +volk_orc_LDFLAGS = +volk_orc_LIBADD = +endif + +libvolk_la_CPPFLAGS = $(AM_CPPFLAGS) $(volk_orc_CFLAGS) +libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) +libvolk_la_LIBADD = $(volk_orc_LIBADD) + +noinst_LTLIBRARIES = + +""" + + #here be dragons + for machine_name in machines: + tempstring += "if LV_MACHINE_" + machine_name.swapcase() + "\n" + tempstring += "libvolk_" + machine_name + "_la_SOURCES = $(top_gendir)/lib/volk_machine_" + machine_name + ".c\n" + tempstring += "libvolk_" + machine_name + "_la_CPPFLAGS = -I$(top_srcdir)/include -I$(top_gendir)/include $(volk_orc_CFLAGS) " + for arch in machines[machine_name]: + if archflags_dict[arch] != "none": + tempstring += "-" + archflags_dict[arch] + " " + + tempstring += "\nnoinst_LTLIBRARIES += libvolk_" + machine_name + ".la " + tempstring += "\nlibvolk_la_LIBADD += libvolk_" + machine_name + ".la\n" + tempstring += "libvolk_la_CPPFLAGS += -DLV_MACHINE_" + machine_name.swapcase() + " \n" + tempstring += "endif\n" + + + tempstring += r""" + +# ---------------------------------------------------------------- +# The QA library. Note libvolk.la in LIBADD +# ---------------------------------------------------------------- +#libvolk_qa_la_SOURCES = \ +# qa_utils.cc + +#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost + +#libvolk_qa_la_LIBADD = \ +# libvolk.la \ +# libvolk_runtime.la + +# ---------------------------------------------------------------- +# headers that don't get installed +# ---------------------------------------------------------------- +noinst_HEADERS = \ + $(top_gendir)/lib/volk_init.h \ + $(top_gendir)/lib/volk_machines.h \ + $(top_gendir)/lib/volk_environment_init.h \ + qa_utils.h + +# ---------------------------------------------------------------- +# Our test program +# ---------------------------------------------------------------- +noinst_PROGRAMS = \ + testqa + +testqa_SOURCES = testqa.cc qa_utils.cc +testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) +testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) +testqa_LDADD = \ + libvolk.la +""" + + + return tempstring diff --git a/volk/include/volk/make_proccpu_sim.py b/volk/gen/make_proccpu_sim.py index 029dacfcc..029dacfcc 100644 --- a/volk/include/volk/make_proccpu_sim.py +++ b/volk/gen/make_proccpu_sim.py diff --git a/volk/gen/make_set_simd.py b/volk/gen/make_set_simd.py new file mode 100644 index 000000000..5a848e59e --- /dev/null +++ b/volk/gen/make_set_simd.py @@ -0,0 +1,166 @@ +# +# Copyright 2010 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +from xml.dom import minidom + +def make_set_simd(dom, machines) : + tempstring = ""; + tempstring = tempstring +'dnl this file is auto generated by volk_register.py\n\n'; + + tempstring = tempstring +'\ndnl define arch checks\n'; + for domarch in dom: + if str(domarch.attributes["type"].value) != "all": + arch = str(domarch.attributes["name"].value); + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + tempstring = tempstring + "AC_DEFUN([_TRY_ADD_" + arch.swapcase() + "],\n"; + tempstring = tempstring + "[\n"; + tempstring = tempstring + " LF_CHECK_CC_FLAG([-" + flag + "])\n"; + tempstring = tempstring + " LF_CHECK_CXX_FLAG([-" + flag + "])\n"; + tempstring = tempstring + "])\n"; + + tempstring = tempstring +'\ndnl main set_simd_flags\n'; + tempstring = tempstring + "AC_DEFUN([LV_SET_SIMD_FLAGS],\n"; + tempstring = tempstring + "[\n"; + #tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; + tempstring = tempstring + " AC_SUBST(LV_CXXFLAGS)\n"; + tempstring = tempstring + " indCC=no\n"; + tempstring = tempstring + " indCXX=no\n"; + tempstring = tempstring + " indLV_ARCH=no\n"; + tempstring = tempstring + " AC_ARG_WITH(lv_arch,\n"; + tempstring = tempstring + " AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list("; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + arch + ", " + tempstring = tempstring[0:len(tempstring) - 2]; + + tempstring = tempstring + ")]),\n"; + tempstring = tempstring + " [cf_with_lv_arch=\"$withval\"],\n"; + tempstring = tempstring + " [cf_with_lv_arch=\"\"])\n"; + if str(domarch.attributes["type"].value) == "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " AC_DEFINE(LV_MAKE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; + tempstring = tempstring + " ADDONS=\"\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; + #tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; + tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; + tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; + tempstring = tempstring + " cf_with_lv_arch=\""; + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + arch + " "; + tempstring = tempstring[0:-1] + "\"\n"; + tempstring = tempstring + " OVERRULE_FLAG=\"yes\"\n"; + tempstring = tempstring + " fi\n"; + + tempstring = tempstring +'\ndnl init LV_MAKE_XXX and then try to add archs\n'; + for domarch in dom: + if str(domarch.attributes["type"].value) != "all": + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=no\n"; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + if atype != "all": + tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + atype = str(domarch.attributes["type"].value); + tempstring = tempstring +'\ndnl add in flags for arch ' + arch + '\n'; + overrule = domarch.getElementsByTagName("overrule"); + if overrule: + overrule = str(overrule[0].firstChild.data); + else: + overrule = ""; + overrule_val = domarch.getElementsByTagName("overrule_val"); + if overrule_val: + overrule_val = str(overrule_val[0].firstChild.data); + else: + overrule_val = ""; + flag = domarch.getElementsByTagName("flag"); + flag = str(flag[0].firstChild.data); + if atype != "all": + tempstring = tempstring + " for i in $lf_CXXFLAGS\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; + tempstring = tempstring + " indCXX=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " for i in $lf_CFLAGS\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; + tempstring = tempstring + " indCC=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + + tempstring = tempstring + " if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + + #tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indCC=no\n" + tempstring = tempstring + " indCXX=no\n" + tempstring = tempstring + " indLV_ARCH=no\n" + else: + tempstring = tempstring + " for i in $cf_with_lv_arch\n" + tempstring = tempstring + " do\n" + tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; + tempstring = tempstring + " indLV_ARCH=yes\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " done\n" + tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " indLV_ARCH=no\n" + tempstring = tempstring + " fi\n" + tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" + tempstring = tempstring + " LV_MAKE_" + arch.swapcase() + "=yes\n"; + tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; + tempstring = tempstring + " fi\n" + tempstring = tempstring + " indLV_ARCH=no\n" + + + for domarch in dom: + arch = str(domarch.attributes["name"].value); + tempstring = tempstring + " AM_CONDITIONAL(LV_MAKE_" + arch.swapcase() + ", test \"$LV_MAKE_" + arch.swapcase() + "\" == \"yes\")\n"; + + tempstring += "\n" + #now we can define the machines we're compiling + for machine_name in machines: + tempstring += " AM_CONDITIONAL(LV_MACHINE_" + machine_name.swapcase() + ", " + marchlist = machines[machine_name] + for march in marchlist: + tempstring += "test \"$LV_MAKE_" + march.swapcase() + "\" == \"yes\" && " + + tempstring += "test true)\n" #just so we don't have to detect the last one in the group, i know + tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" + tempstring = tempstring + "])\n" + + return tempstring; + + diff --git a/volk/include/volk/make_typedefs.py b/volk/gen/make_typedefs.py index fe81cb2b0..8f9f2b55e 100644 --- a/volk/include/volk/make_typedefs.py +++ b/volk/gen/make_typedefs.py @@ -16,7 +16,7 @@ def make_typedefs(funclist, retlist, my_argtypelist) : tempstring = tempstring + '\n'; for i in range(len(funclist)): - tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n\n"; + tempstring = tempstring + "typedef " + retlist[i] +" (*" + replace_volk.sub("p", funclist[i]) + ")(" + my_argtypelist[i] + ");\n"; tempstring = tempstring + "#endif /*INCLUDED_VOLK_TYPEDEFS*/\n"; diff --git a/volk/include/volk/volk_regexp.py b/volk/gen/volk_regexp.py index 7b695cb3b..b83ce5206 100644 --- a/volk/include/volk/volk_regexp.py +++ b/volk/gen/volk_regexp.py @@ -1,4 +1,5 @@ import re +import string remove_after_underscore = re.compile("_.*"); space_remove = re.compile(" "); @@ -6,3 +7,8 @@ leading_space_remove = re.compile("^ *"); replace_arch = re.compile(", const char\* arch"); replace_bracket = re.compile(" {"); replace_volk = re.compile("volk"); + +def strip_trailing(tostrip, stripstr): + lindex = tostrip.rfind(stripstr) + tostrip = tostrip[0:lindex] + string.replace(tostrip[lindex:len(tostrip)], stripstr, ""); + return tostrip diff --git a/volk/include/volk/volk_register.py b/volk/gen/volk_register.py index bc8f959af..62ebba3c2 100755..100644 --- a/volk/include/volk/volk_register.py +++ b/volk/gen/volk_register.py @@ -1,50 +1,54 @@ #! /usr/bin/env python import sys +import os import re import string from xml.dom import minidom from volk_regexp import * -from make_cpuid_x86_c import make_cpuid_x86_c +from make_cpuid_c import make_cpuid_c from make_cpuid_h import make_cpuid_h -from make_proccpu_sim import make_proccpu_sim from make_set_simd import make_set_simd -from make_cpuid_generic_c import make_cpuid_generic_c -from make_cpuid_powerpc_c import make_cpuid_powerpc_c -from make_registry import make_registry -from make_h import make_h -from make_init_h import make_init_h from make_config_fixed import make_config_fixed -from make_config_in import make_config_in -from make_c import make_c -from make_runtime_c import make_runtime_c -from make_init_c import make_init_c -from make_runtime import make_runtime from make_typedefs import make_typedefs -from make_mktables import make_mktables from make_environment_init_c import make_environment_init_c from make_environment_init_h import make_environment_init_h - -outfile_set_simd = open("../../config/lv_set_simd_flags.m4", "w"); -outfile_reg = open("volk_registry.h", "w"); -outfile_h = open("volk.h", "w"); -outfile_c = open("../../lib/volk.c", "w"); -outfile_runtime = open("volk_runtime.h", "w"); -outfile_runtime_c = open("../../lib/volk_runtime.c", "w"); -outfile_typedefs = open("volk_typedefs.h", "w"); -outfile_init_h = open("../../lib/volk_init.h", "w"); -outfile_init_c = open("../../lib/volk_init.c", "w"); -outfile_cpu_h = open("volk_cpu.h", "w"); -outfile_cpu_x86_c = open("../../lib/volk_cpu_x86.c", "w"); -outfile_cpu_generic_c = open("../../lib/volk_cpu_generic.c", "w"); -outfile_cpu_powerpc_c = open("../../lib/volk_cpu_powerpc.c", "w"); -outfile_proccpu_sim = open("../../lib/volk_proccpu_sim.c", "w"); -outfile_config_in = open("../../volk_config.h.in", "w"); -outfile_config_fixed = open("volk_config_fixed.h", "w"); -outfile_mktables = open("../../lib/volk_mktables.c", "w"); -outfile_environment_c = open("../../lib/volk_environment_init.c", "w"); -outfile_environment_h = open("volk_environment_init.h", "w"); -infile = open("Makefile.am", "r"); +from make_makefile_am import make_makefile_am +from make_machines_h import make_machines_h +from make_machines_c import make_machines_c +from make_each_machine_c import make_each_machine_c +from make_c import make_c +from make_h import make_h +import copy + +#set srcdir and gendir +srcdir = os.path.dirname(os.path.dirname(__file__)) +try: gendir = sys.argv[1] +except: gendir = os.path.dirname(__file__) + +#ensure directories exist +for dir in ( + (os.path.join(gendir, 'include', 'volk')), + (os.path.join(gendir, 'lib')), + (os.path.join(gendir, 'config')) +): + if not os.path.exists(dir): os.makedirs(dir) + +outfile_set_simd = open(os.path.join(gendir, "config/lv_set_simd_flags.m4"), "w") +outfile_h = open(os.path.join(gendir, "include/volk/volk.h"), "w") +outfile_c = open(os.path.join(gendir, "lib/volk.c"), "w") +outfile_typedefs = open(os.path.join(gendir, "include/volk/volk_typedefs.h"), "w") +outfile_init_h = open(os.path.join(gendir, "lib/volk_init.h"), "w") +outfile_cpu_h = open(os.path.join(gendir, "include/volk/volk_cpu.h"), "w") +outfile_cpu_c = open(os.path.join(gendir, "lib/volk_cpu.c"), "w") +#outfile_config_in = open(os.path.join(gendir, "include/volk/volk_config.h.in"), "w") +outfile_config_fixed = open(os.path.join(gendir, "include/volk/volk_config_fixed.h"), "w") +outfile_environment_c = open(os.path.join(gendir, "lib/volk_environment_init.c"), "w") +outfile_environment_h = open(os.path.join(gendir, "lib/volk_environment_init.h"), "w") +outfile_makefile_am = open(os.path.join(gendir, "lib/Makefile.am"), "w") +outfile_machines_h = open(os.path.join(gendir, "lib/volk_machines.h"), "w") +outfile_machines_c = open(os.path.join(gendir, "lib/volk_machines.c"), "w") +infile = open(os.path.join(srcdir, "include/volk/Makefile.am"), "r") mfile = infile.readlines(); @@ -77,7 +81,7 @@ for line in mfile: functions.append(subsubline.group(0)); archs = []; -afile = minidom.parse("archs.xml"); +afile = minidom.parse(os.path.join(srcdir, "gen/archs.xml")) filearchs = afile.getElementsByTagName("arch"); for filearch in filearchs: archs.append(str(filearch.attributes["name"].value)); @@ -86,8 +90,12 @@ for arch in archs: a_var = re.search("^\$", arch); if a_var: archs.remove(arch); + + - +archflags_dict = {} +for filearch in filearchs: + archflags_dict[str(filearch.attributes["name"].value)] = str(filearch.getElementsByTagName("flag")[0].firstChild.data) archs_or = "(" for arch in archs: @@ -95,7 +103,41 @@ for arch in archs: archs_or = archs_or[0:len(archs_or)-1]; archs_or = archs_or + ")"; - +#get machine list and parse to a list of machines, each with a list of archs (none of this DOM crap) +machine_str_dict = {} +mfile = minidom.parse(os.path.join(srcdir, "gen/machines.xml")) +filemachines = mfile.getElementsByTagName("machine") + +for filemachine in filemachines: + machine_str_dict[str(filemachine.attributes["name"].value)] = str(filemachine.getElementsByTagName("archs")[0].firstChild.data).split() + +#all right now you have a dict of arch lists +#next we expand it +#this is an expanded list accounting for the OR syntax +#TODO: make this work for multiple "|" machines +machines = {} +already_done = False +for machine_name in machine_str_dict: + already_done = False + marchlist = machine_str_dict[machine_name] + for march in marchlist: + or_marchs = march.split("|") + if len(or_marchs) > 1: + marchlist.remove(march) + for or_march in or_marchs: + tempmarchlist = copy.deepcopy(marchlist) + tempmarchlist.append(or_march) + machines[machine_name + "_" + or_march] = tempmarchlist + already_done = True + + if not already_done: + machines[machine_name] = marchlist + +#for machine_name in machines: +# print machine_name + ": " + str(machines[machine_name]) + +#ok, now we have all the machines we're going to build. next step is to generate a Makefile.am where they're all laid out and compiled + taglist = []; fcountlist = []; arched_arglist = []; @@ -105,13 +147,13 @@ my_argtypelist = []; for func in functions: tags = []; fcount = []; - infile_source = open(func + ".h"); + infile_source = open(os.path.join(srcdir, 'include', 'volk', func + ".h")) begun_name = 0; begun_paren = 0; sourcefile = infile_source.readlines(); infile_source.close(); for line in sourcefile: - +#FIXME: make it work for multiple #if define()s archline = re.search("^\#if.*?LV_HAVE_" + archs_or + ".*", line); if archline: arch = archline.group(0); @@ -219,60 +261,39 @@ for func in functions: fcountlist.append(fcount); taglist.append(tags); -outfile_mktables.write(make_mktables(functions)); -outfile_mktables.close(); - outfile_cpu_h.write(make_cpuid_h(filearchs)); outfile_cpu_h.close(); -outfile_cpu_x86_c.write(make_cpuid_x86_c(filearchs)); -outfile_cpu_x86_c.close(); - -outfile_proccpu_sim.write(make_proccpu_sim(filearchs)); -outfile_proccpu_sim.close(); +outfile_cpu_c.write(make_cpuid_c(filearchs)); +outfile_cpu_c.close(); -outfile_set_simd.write(make_set_simd(filearchs)); +outfile_set_simd.write(make_set_simd(filearchs, machines)); outfile_set_simd.close(); -outfile_cpu_generic_c.write(make_cpuid_generic_c(filearchs)); -outfile_cpu_generic_c.close(); - -outfile_cpu_powerpc_c.write(make_cpuid_powerpc_c(filearchs)); -outfile_cpu_powerpc_c.close(); - -outfile_config_in.write(make_config_in(filearchs)); -outfile_config_in.close(); - -outfile_reg.write(make_registry(filearchs, functions, fcountlist)); -outfile_reg.close(); - -outfile_h.write(make_h(functions, arched_arglist, retlist)); -outfile_h.close(); - -outfile_init_h.write(make_init_h(functions, arched_arglist, retlist)); -outfile_init_h.close(); - outfile_config_fixed.write(make_config_fixed(filearchs)); outfile_config_fixed.close(); -outfile_c.write( make_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_c.close(); +outfile_typedefs.write(make_typedefs(functions, retlist, my_argtypelist)); +outfile_typedefs.close(); -outfile_runtime_c.write(make_runtime_c(functions, taglist, arched_arglist, retlist, my_arglist, fcountlist)); -outfile_runtime_c.close(); +outfile_makefile_am.write(make_makefile_am(filearchs, machines, archflags_dict)) +outfile_makefile_am.close() -outfile_init_c.write(make_init_c(functions, filearchs)); -outfile_init_c.close(); +outfile_machines_h.write(make_machines_h(functions, machines, archs)) +outfile_machines_h.close() -outfile_runtime.write(make_runtime(functions)); -outfile_runtime.close(); +outfile_machines_c.write(make_machines_c(machines)) +outfile_machines_c.close() -outfile_typedefs.write(make_typedefs(functions, retlist, my_argtypelist)); -outfile_typedefs.close(); +outfile_c.write(make_c(machines, archs, functions, arched_arglist, my_arglist)) +outfile_c.close() -outfile_environment_c.write(make_environment_init_c(filearchs)); -outfile_environment_c.close(); +outfile_h.write(make_h(functions, arched_arglist)) +outfile_h.close() -outfile_environment_h.write(make_environment_init_h()); -outfile_environment_h.close(); +for machine in machines: + machine_c_filename = os.path.join(gendir, "lib/volk_machine_" + machine + ".c") + outfile_machine_c = open(machine_c_filename, "w") + outfile_machine_c.write(make_each_machine_c(machine, machines[machine], functions, fcountlist, taglist)) + outfile_machine_c.close() diff --git a/volk/include/volk/.gitignore b/volk/include/volk/.gitignore index be8358f3a..b336cc7ce 100644 --- a/volk/include/volk/.gitignore +++ b/volk/include/volk/.gitignore @@ -1,20 +1,2 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.h -/volk_config.h -/volk_config_fixed.h -/volk_cpu.h -/volk_environment_init.h -/volk_registry.h -/volk_runtime.h -/volk_tables.h -/volk_typedefs.h -/volk_mktables diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am index 7a5edd624..5f9e134bc 100644 --- a/volk/include/volk/Makefile.am +++ b/volk/include/volk/Makefile.am @@ -20,27 +20,19 @@ include $(top_srcdir)/Makefile.common -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \ +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ $(LV_CXXFLAGS) $(WITH_INCLUDES) volkincludedir = $(prefix)/include/volk -BUILT_SOURCES: \ - volk_config.h \ - volk_tables.h - volkinclude_HEADERS = \ volk_complex.h \ volk_common.h \ - volk_config_fixed.h \ - volk_runtime.h \ - volk_config.h \ - volk_tables.h \ - volk_typedefs.h \ - volk_registry.h \ - volk.h \ - volk_cpu.h \ - volk_environment_init.h \ + volk_prefs.h \ + $(top_gendir)/include/volk/volk_config_fixed.h \ + $(top_gendir)/include/volk/volk_typedefs.h \ + $(top_gendir)/include/volk/volk.h \ + $(top_gendir)/include/volk/volk_cpu.h \ volk_16i_x5_add_quad_16i_x4_a16.h \ volk_16i_branch_4_state_8_a16.h \ volk_16ic_deinterleave_16i_x2_a16.h \ @@ -129,33 +121,3 @@ volkinclude_HEADERS = \ volk_8i_convert_16i_u.h \ volk_8i_s32f_convert_32f_a16.h \ volk_8i_s32f_convert_32f_u.h - -VOLK_MKTABLES_SOURCES = \ - $(platform_CODE) \ - $(top_srcdir)/lib/volk_rank_archs.c \ - $(top_srcdir)/lib/volk_mktables.c - - -volk_mktables$(EXEEXT): $(VOLK_MKTABLES_SOURCES) - $(CC) -o $@ $^ $(AM_CPPFLAGS) -I$(top_builddir)/include - -volk_tables.h: volk_mktables$(EXEEXT) - ./volk_mktables$(EXEEXT) - -volk_config.h: $(top_builddir)/volk_config.h - cp $^ $(top_builddir)/include/volk/$@ - -distclean-local: - rm -f volk_config_fixed.h - rm -f volk_config.h - rm -f volk_cpu.h - rm -f volk.h - rm -f volk_registry.h - rm -f volk_runtime.h - rm -f volk_typedefs.h - rm -f volk_tables.h - rm -f *.pyc - rm -f Makefile.in - rm -f volk_environment_init.h - rm -f volk_mktables - rm -f $(BUILT_SOURCES) diff --git a/volk/include/volk/emit_omnilog.py b/volk/include/volk/emit_omnilog.py deleted file mode 100644 index 309d7e578..000000000 --- a/volk/include/volk/emit_omnilog.py +++ /dev/null @@ -1,13 +0,0 @@ -def emit_prolog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + 'extern "C" {\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; -def emit_epilog(): - tempstring = ""; - tempstring = tempstring + '#ifdef __cplusplus\n'; - tempstring = tempstring + '}\n'; - tempstring = tempstring + '#endif\n'; - return tempstring; - diff --git a/volk/include/volk/make_c.py b/volk/include/volk/make_c.py deleted file mode 100644 index 6e75067d0..000000000 --- a/volk/include/volk/make_c.py +++ /dev/null @@ -1,73 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n\n#include<volk/volk.h>\n'; - tempstring = tempstring + '#include<volk/volk_tables.h>\n'; - tempstring = tempstring + '#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<string.h>\n'; - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "static inline unsigned int volk_get_index(const char** indices, const char* arch, const int* arch_defs) {\n"; - tempstring = tempstring + " int i = 1;\n" - tempstring = tempstring + " for(;i<arch_defs[0];++i){\n" - tempstring = tempstring + " if (strcmp(arch, indices[i]) == 0) {\n" - tempstring = tempstring + " return i;\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " }\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n" - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "static const char* " + funclist[i] + "_indices[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n \"" + str(taglist[i][tags_counter]) + "\",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + "_manual" + arched_arglist[i] + '\n'; - tempstring = tempstring + "return " + funclist[i] + "_archs[volk_get_index(" + funclist[i] + "_indices, arch, " + funclist[i] + "_arch_defs)](" + my_arglist[i] + ");" + "\n}\n"; - - tempstring = tempstring + retlist[i] + "inline " + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - - tempstring = tempstring + funclist[i] + "_archs[" + funclist[i] + "_func_table](" + my_arglist[i] + ");" + '\n'; - tempstring = tempstring + "}\n\n"; - - return tempstring; diff --git a/volk/include/volk/make_cpuid_generic_c.py b/volk/include/volk/make_cpuid_generic_c.py deleted file mode 100644 index c682d4138..000000000 --- a/volk/include/volk/make_cpuid_generic_c.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_generic_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; diff --git a/volk/include/volk/make_cpuid_powerpc_c.py b/volk/include/volk/make_cpuid_powerpc_c.py deleted file mode 100644 index 0b0ea84e7..000000000 --- a/volk/include/volk/make_cpuid_powerpc_c.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_powerpc_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - #just assume it has them for powerpc - for domarch in dom: - if str(domarch.attributes["type"].value) == "powerpc": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - - tempstring = tempstring + "}\n\n" - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - diff --git a/volk/include/volk/make_cpuid_x86_c.py b/volk/include/volk/make_cpuid_x86_c.py deleted file mode 100644 index 2b2bd7c91..000000000 --- a/volk/include/volk/make_cpuid_x86_c.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GNU Radio; see the file COPYING. If not, write to -# the Free Software Foundation, Inc., 51 Franklin Street, -# Boston, MA 02110-1301, USA. -# - -from xml.dom import minidom - -def make_cpuid_x86_c(dom) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring + "#include <volk/volk_cpu.h>\n" - tempstring = tempstring + "#include <volk/volk_config_fixed.h>\n\n" - tempstring = tempstring + "#include <gcc_x86_cpuid.h>\n\n" - tempstring = tempstring + "struct VOLK_CPU volk_cpu;\n\n" - - tempstring = tempstring + "#define cpuid_x86(op, r) __get_cpuid(op, r+0, r+1, r+2, r+3)\n\n" - tempstring = tempstring + "static inline unsigned int cpuid_eax(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[0];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ebx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[1];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_ecx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[2];\n" - tempstring = tempstring + "}\n\n"; - - tempstring = tempstring + "static inline unsigned int cpuid_edx(unsigned int op) {\n"; - tempstring = tempstring + " unsigned int regs[4];\n" - tempstring = tempstring + " cpuid_x86 (op, regs);\n" - tempstring = tempstring + " return regs[3];\n" - tempstring = tempstring + "}\n\n"; - - for domarch in dom: - if str(domarch.attributes["type"].value) == "x86": - if "no_test" in domarch.attributes.keys(): - no_test = str(domarch.attributes["no_test"].value); - if no_test == "true": - no_test = True; - else: - no_test = False; - else: - no_test = False; - arch = str(domarch.attributes["name"].value); - op = domarch.getElementsByTagName("op"); - if op: - op = str(op[0].firstChild.data); - reg = domarch.getElementsByTagName("reg"); - if reg: - reg = str(reg[0].firstChild.data); - shift = domarch.getElementsByTagName("shift"); - if shift: - shift = str(shift[0].firstChild.data); - val = domarch.getElementsByTagName("val"); - if val: - val = str(val[0].firstChild.data); - - if no_test: - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - elif op == "1": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int e" + reg + "x = cpuid_e" + reg + "x (" + op + ");\n" - tempstring = tempstring + " return ((e" + reg + "x >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - - elif op == "0x80000001": - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " unsigned int extended_fct_count = cpuid_eax(0x80000000);\n"; - tempstring = tempstring + " if (extended_fct_count < 0x80000001)\n"; - tempstring = tempstring + " return "+ val + "^1;\n\n" - tempstring = tempstring + " unsigned int extended_features = cpuid_e" + reg + "x (" + op + ");\n"; - tempstring = tempstring + " return ((extended_features >> " + shift + ") & 1) == " + val + ";\n" - tempstring = tempstring + "}\n\n"; - elif str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 1;\n" - tempstring = tempstring + "}\n\n" - else: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + "int i_can_has_" + arch + " () {\n" - tempstring = tempstring + " return 0;\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "void volk_cpu_init() {\n"; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " volk_cpu.has_" + arch + " = &i_can_has_" + arch + ";\n" - tempstring = tempstring + "}\n\n" - - tempstring = tempstring + "unsigned int volk_get_lvarch() {\n"; - tempstring = tempstring + " unsigned int retval = 0;\n" - tempstring = tempstring + " volk_cpu_init();\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " retval += volk_cpu.has_" + arch + "() << LV_" + arch.swapcase() + ";\n" - tempstring = tempstring + " return retval;\n" - tempstring = tempstring + "}\n\n" - - return tempstring; - - - - - - - diff --git a/volk/include/volk/make_h.py b/volk/include/volk/make_h.py deleted file mode 100644 index 81d9ad401..000000000 --- a/volk/include/volk/make_h.py +++ /dev/null @@ -1,28 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_environment_init.h>\n' - tempstring = tempstring + emit_prolog() - tempstring = tempstring + '\n'; - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - tempstring = tempstring + retlist[i] + funclist[i] + "_manual" + replace_bracket.sub(";", arched_arglist[i]) + '\n'; - - tempstring = tempstring + emit_epilog(); - - tempstring = tempstring + "#endif /*INCLUDED_VOLK_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_init_c.py b/volk/include/volk/make_init_c.py deleted file mode 100644 index 330e19592..000000000 --- a/volk/include/volk/make_init_c.py +++ /dev/null @@ -1,42 +0,0 @@ -from xml.dom import minidom - -def make_init_c(funclist, dom) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - for domarch in dom: - arch = str(domarch.attributes["name"].value); - incs = domarch.getElementsByTagName("include"); - for inc in incs: - my_inc = str(inc.firstChild.data); - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + "#include<" + my_inc + ">\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + '\n\n'; - - tempstring = tempstring + "extern struct VOLK_RUNTIME volk_runtime;\n\n"; - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime(){\n"; - tempstring = tempstring + " return &volk_runtime;\n"; - tempstring = tempstring + "}\n\n" - tempstring = tempstring + " void volk_runtime_init() {\nvolk_cpu_init();\n"; - - for func in funclist: - tempstring = tempstring + " volk_runtime." + func + " = default_acquire_" + func + ";\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - envs = domarch.getElementsByTagName("environment"); - for env in envs: - cmd = str(env.firstChild.data); - tempstring = tempstring + " if(volk_cpu.has_" + arch + "()){\n"; - tempstring = tempstring + "#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring + " " + cmd + "\n"; - tempstring = tempstring + "#endif\n" - tempstring = tempstring + " }\n"; - - tempstring = tempstring + "}\n"; - - return tempstring diff --git a/volk/include/volk/make_init_h.py b/volk/include/volk/make_init_h.py deleted file mode 100644 index 6dbe1c585..000000000 --- a/volk/include/volk/make_init_h.py +++ /dev/null @@ -1,26 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_init_h(funclist, arched_arglist, retlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_INIT_H'; - tempstring = tempstring + '\n\n#include<inttypes.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - - tempstring = tempstring + '\n'; - - tempstring = tempstring + emit_prolog(); - - for i in range(len(retlist)): - tempstring = tempstring + retlist[i] + " default_acquire_" + funclist[i] + replace_bracket.sub(";", replace_arch.sub("", arched_arglist[i])) + '\n'; - - tempstring= tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_INIT_H*/\n"; - - return tempstring; diff --git a/volk/include/volk/make_mktables.py b/volk/include/volk/make_mktables.py deleted file mode 100644 index 051ac268d..000000000 --- a/volk/include/volk/make_mktables.py +++ /dev/null @@ -1,33 +0,0 @@ - - -def make_mktables(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '#include<stdio.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - tempstring = tempstring + '#include<volk_rank_archs.h>\n'; - tempstrgin = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + "\n\n"; - - tempstring = tempstring + 'int main() {\n'; - tempstring = tempstring + ' int i = 0;\n'; - tempstring = tempstring + ' FILE* output;\n'; - tempstring = tempstring + ' output = fopen("volk_tables.h", "w");\n'; - tempstring = tempstring + ' fprintf(output, "#ifndef INCLUDED_VOLK_TABLES_H\\n");\n'; - tempstring = tempstring + ' fprintf(output, "#define INCLUDED_VOLK_TABLES_H\\n\\n");\n'; - - for func in funclist: - tempstring = tempstring + ' fprintf(output, "static const ' + func + '_func_table = %u;\\n", volk_rank_archs(' + func + '_arch_defs, volk_get_lvarch()));\n'; - tempstring = tempstring + ' fprintf(output, "#endif /*INCLUDED_VOLK_TABLES_H*/\\n");\n'; - tempstring = tempstring + ' fclose(output);\n' - tempstring = tempstring + '}\n'; - return tempstring; - - - - - - - - diff --git a/volk/include/volk/make_registry.py b/volk/include/volk/make_registry.py deleted file mode 100644 index 8457d61f3..000000000 --- a/volk/include/volk/make_registry.py +++ /dev/null @@ -1,62 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -import string - -def make_registry(dom, funclist, fcountlist) : - tempstring = ""; - tempstring = tempstring + "/*this file is auto_generated by volk_register.py*/\n\n"; - tempstring = tempstring +'\n#ifndef INCLUDED_VOLK_REGISTRY_H'; - tempstring = tempstring +'\n#define INCLUDED_VOLK_REGISTRY_H\n\n'; - tempstring = tempstring +'#include<volk/volk_config.h>\n'; - tempstring = tempstring +'#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + emit_prolog(); - tempstring = tempstring + '\n' - - - - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring +"#if LV_HAVE_" + arch.swapcase() + "\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 1\n"; - tempstring = tempstring +"#else\n"; - tempstring = tempstring +"#define LV_" + arch.swapcase() + "_CNT 0\n"; - tempstring = tempstring +"#endif /*LV_HAVE_" + arch.swapcase() + "*/\n\n"; - - counter = 0; - for fcount in fcountlist: - tempstring = tempstring + "static const int " + funclist[counter] + "_arch_defs[] = {\n"; - counter = counter + 1; - for arch_list in fcount: - tempstring = tempstring + " (LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind] + "_CNT"; - if ind < len(arch_list) - 1: - tempstring = tempstring + " * LV_"; - tempstring = tempstring + ") + "; - lindex = tempstring.rfind(" + "); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], " + ", ""); - tempstring = tempstring + ",\n" - for arch_list in fcount: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - tempstring = tempstring + "\n" - tempstring = tempstring + " (1 << LV_" - for ind in range(len(arch_list)): - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + ") + (1 << LV_" - tempstring = tempstring + "),\n#endif\n" - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n" - - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring +"#endif /*INCLUDED_VOLK_REGISTRY_H*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime.py b/volk/include/volk/make_runtime.py deleted file mode 100644 index 645b3aaee..000000000 --- a/volk/include/volk/make_runtime.py +++ /dev/null @@ -1,34 +0,0 @@ -from xml.dom import minidom -from emit_omnilog import * -from volk_regexp import * - - - -def make_runtime(funclist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/\n'; - - tempstring = tempstring + '\n#ifndef INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n#define INCLUDED_VOLK_RUNTIME'; - tempstring = tempstring + '\n\n#include<volk/volk_typedefs.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + '#include<volk/volk_config_fixed.h>\n'; - tempstring = tempstring + '#include<volk/volk_complex.h>\n'; - tempstring = tempstring + emit_prolog(); - - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME {\n"; - - for i in range(len(funclist)): - tempstring = tempstring + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + ";\n"; - tempstring = tempstring + "};\n\n"; - - tempstring = tempstring + "struct VOLK_RUNTIME* get_volk_runtime();\n\n" - tempstring = tempstring + "\nvoid volk_runtime_init();\n"; - - tempstring = tempstring + emit_epilog(); - tempstring = tempstring + "#endif /*INCLUDED_VOLK_RUNTIME*/\n"; - - return tempstring; - diff --git a/volk/include/volk/make_runtime_c.py b/volk/include/volk/make_runtime_c.py deleted file mode 100644 index 070df9ba7..000000000 --- a/volk/include/volk/make_runtime_c.py +++ /dev/null @@ -1,47 +0,0 @@ -from xml.dom import minidom -import string -from volk_regexp import * - - -def make_runtime_c(funclist, taglist, arched_arglist, retlist, my_arglist, fcountlist) : - tempstring = ""; - tempstring = tempstring + '/*this file is auto generated by volk_register.py*/'; - - - tempstring = tempstring + '\n\n#include<volk/volk_runtime.h>\n'; - tempstring = tempstring + '#include<volk/volk_config.h>\n'; - tempstring = tempstring + "#include<volk/volk_config_fixed.h>\n"; - tempstring = tempstring + '#include<volk/volk_cpu.h>\n'; - tempstring = tempstring + '#include<volk_init.h>\n'; - tempstring = tempstring + '#include<volk/volk_registry.h>\n'; - - for func in funclist: - tempstring = tempstring + "#include<volk/" + func + ".h>\n" ; - tempstring = tempstring + '\n'; - - tempstring = tempstring + "struct VOLK_RUNTIME volk_runtime;\n"; - - for i in range(len(funclist)): - tempstring = tempstring + "static const " + replace_volk.sub("p", funclist[i]) + " " + funclist[i] + "_archs[] = {\n"; - - tags_counter = 0; - for arch_list in fcountlist[i]: - tempstring = tempstring + "#if LV_HAVE_" - for ind in range(len(arch_list)): - - tempstring = tempstring + arch_list[ind]; - if ind < len(arch_list) - 1: - tempstring = tempstring + " && LV_HAVE_"; - - tempstring = tempstring + "\n " + funclist[i] + "_" + str(taglist[i][tags_counter]) + ",\n#endif\n"; - tags_counter = tags_counter + 1; - - lindex = tempstring.rfind(","); - tempstring = tempstring[0:lindex] + string.replace(tempstring[lindex:len(tempstring)], ",", ""); - tempstring = tempstring + "};\n\n"; - - - tempstring = tempstring + retlist[i] + "default_acquire_" + funclist[i] + replace_arch.sub("", arched_arglist[i]) + '\n'; - tempstring = tempstring + "volk_runtime." + funclist[i] + " = " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())];\n" + "return " + funclist[i] + "_archs[volk_rank_archs(" + funclist[i] + "_arch_defs, volk_get_lvarch())](" + my_arglist[i] + ");" + '\n}\n'; - - return tempstring; diff --git a/volk/include/volk/make_set_simd.py b/volk/include/volk/make_set_simd.py deleted file mode 100644 index c74b0464d..000000000 --- a/volk/include/volk/make_set_simd.py +++ /dev/null @@ -1,272 +0,0 @@ -# -# Copyright 2010 Free Software Foundation, Inc. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# - -from xml.dom import minidom - -def make_set_simd(dom) : - tempstring = ""; - tempstring = tempstring +'dnl this file is auto generated by volk_register.py\n\n'; - - tempstring = tempstring + "AC_DEFUN([_MAKE_FAKE_PROCCPU],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_MSG_CHECKING([proccpu])\n"; - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ -I$srcdir/lib $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_x86.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (powerpc)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_powerpc.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " (*)\n"; - tempstring = tempstring + " if test -z \"`${CC} -o proccpu -I$srcdir/include/ $srcdir/lib/volk_proccpu_sim.c $srcdir/lib/volk_cpu_generic.c 2>&1`\"\n"; - tempstring = tempstring + " then\n"; - tempstring = tempstring + " AC_MSG_RESULT(yes)\n"; - tempstring = tempstring + " lv_PROCCPU=\"`./proccpu`\"\n"; - tempstring = tempstring + " rm -f proccpu\n"; - tempstring = tempstring + " else\n"; - tempstring = tempstring + " AC_MSG_RESULT(no)\n"; - tempstring = tempstring + " lv_PROCCPU=no\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " ;;\n"; - tempstring = tempstring + " esac\n"; - tempstring = tempstring + "])\n" - - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - tempstring = tempstring + "AC_DEFUN([_TRY_ADD_" + arch.swapcase() + "],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " LF_CHECK_CC_FLAG([-" + flag + "])\n"; - tempstring = tempstring + " LF_CHECK_CXX_FLAG([-" + flag + "])\n"; - tempstring = tempstring + "])\n"; - - tempstring = tempstring + "AC_DEFUN([LV_SET_SIMD_FLAGS],\n"; - tempstring = tempstring + "[\n"; - tempstring = tempstring + " AC_REQUIRE([GR_SET_MD_CPU])\n"; - tempstring = tempstring + " AC_SUBST(LV_CXXFLAGS)\n"; - tempstring = tempstring + " indCC=no\n"; - tempstring = tempstring + " indCXX=no\n"; - tempstring = tempstring + " indLV_ARCH=no\n"; - tempstring = tempstring + " AC_ARG_WITH(lv_arch,\n"; - tempstring = tempstring + " AC_HELP_STRING([--with-lv_arch=ARCH],[set volk hardware speedups as space separated string with elements from the following list("; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + arch + ", " - tempstring = tempstring[0:len(tempstring) - 2]; - - tempstring = tempstring + ")]),\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"$withval\"],\n"; - tempstring = tempstring + " [cf_with_lv_arch=\"\"])\n"; - if str(domarch.attributes["type"].value) == "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [always set "+ arch + "!])\n"; - tempstring = tempstring + " ADDONS=\"\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"\"\n"; - tempstring = tempstring + " _MAKE_FAKE_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"no\"\n"; - tempstring = tempstring + " if test -z \"$cf_with_lv_arch\"; then\n"; - tempstring = tempstring + " cf_with_lv_arch=$lv_PROCCPU\n"; - tempstring = tempstring + " OVERRULE_FLAG=\"yes\"\n"; - - tempstring = tempstring + " fi\n"; - for domarch in dom: - if str(domarch.attributes["type"].value) != "all": - arch = str(domarch.attributes["name"].value); - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=no\n"; - - tempstring = tempstring + " case \"$MD_CPU\" in\n"; - tempstring = tempstring + " (x86)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "x86": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - overrule = domarch.getElementsByTagName("overrule"); - if overrule: - overrule = str(overrule[0].firstChild.data); - else: - overrule = ""; - overrule_val = domarch.getElementsByTagName("overrule_val"); - if overrule_val: - overrule_val = str(overrule_val[0].firstChild.data); - else: - overrule_val = ""; - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "x86": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - - tempstring = tempstring + " if test \"$indCC\" == \"yes\" && test \"$indCXX\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - - tempstring = tempstring + " ;;\n" - - tempstring = tempstring + " (powerpc)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - if atype == "powerpc": - tempstring = tempstring + " _TRY_ADD_" + arch.swapcase() + "\n"; - - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "powerpc": - tempstring = tempstring + " for i in $lf_CXXFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCXX=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $lf_CFLAGS\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X-" + flag +"; then\n"; - tempstring = tempstring + " indCC=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test \"$indCC\" = yes && test \"indCXX\" = yes && \"indLV_ARCH\" = yes; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " ADDONS=\"${ADDONS} -" + flag + "\"\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indCC=no\n" - tempstring = tempstring + " indCXX=no\n" - tempstring = tempstring + " indLV_ARCH=no\n" - elif atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " (*)\n" - for domarch in dom: - arch = str(domarch.attributes["name"].value); - atype = str(domarch.attributes["type"].value); - flag = domarch.getElementsByTagName("flag"); - flag = str(flag[0].firstChild.data); - if atype == "all": - tempstring = tempstring + " for i in $cf_with_lv_arch\n" - tempstring = tempstring + " do\n" - tempstring = tempstring + " if test \"X$i\" = X" + arch + "; then\n"; - tempstring = tempstring + " indLV_ARCH=yes\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " done\n" - tempstring = tempstring + " if test -n \"" + overrule + "\" && test \"$" + overrule + "\" == \"" + overrule_val + "\" && test \"$OVERRULE_FLAG\" == \"yes\" && test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " fi\n" - tempstring = tempstring + " if test \"$indLV_ARCH\" == \"yes\"; then\n" - tempstring = tempstring + " AC_DEFINE(LV_HAVE_" + arch.swapcase() + ", 1, [" + arch + " flag set])\n"; - tempstring = tempstring + " LV_HAVE_" + arch.swapcase() + "=yes\n"; - tempstring = tempstring + " BUILT_ARCHS=\"${BUILT_ARCHS} " + arch + "\"\n"; - tempstring = tempstring + " fi\n" - tempstring = tempstring + " indLV_ARCH=no\n" - tempstring = tempstring + " ;;\n" - tempstring = tempstring + " esac\n" - tempstring = tempstring + " LV_CXXFLAGS=\"${LV_CXXFLAGS} ${ADDONS}\"\n" - tempstring = tempstring + "])\n" - - return tempstring; - - diff --git a/volk/include/volk/volk_16i_branch_4_state_8_a16.h b/volk/include/volk/volk_16i_branch_4_state_8_a16.h index 3437c1a6b..5eb03b346 100644 --- a/volk/include/volk/volk_16i_branch_4_state_8_a16.h +++ b/volk/include/volk/volk_16i_branch_4_state_8_a16.h @@ -8,7 +8,7 @@ -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> @@ -137,7 +137,7 @@ static inline void volk_16i_branch_4_state_8_a16_ssse3(short* target, short* s #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_branch_4_state_8_a16_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) { int i = 0; diff --git a/volk/include/volk/volk_16i_convert_8i_a16.h b/volk/include/volk/volk_16i_convert_8i_a16.h index 73e45ad63..4d51e5903 100644 --- a/volk/include/volk/volk_16i_convert_8i_a16.h +++ b/volk/include/volk/volk_16i_convert_8i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data diff --git a/volk/include/volk/volk_16i_convert_8i_u.h b/volk/include/volk/volk_16i_convert_8i_u.h index 5fc792b56..df1084fe0 100644 --- a/volk/include/volk/volk_16i_convert_8i_u.h +++ b/volk/include/volk/volk_16i_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the input 16 bit integer data into 8 bit integer data diff --git a/volk/include/volk/volk_16i_max_star_16i_a16.h b/volk/include/volk/volk_16i_max_star_16i_a16.h index ff57bd2a1..063444279 100644 --- a/volk/include/volk/volk_16i_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_16i_a16.h @@ -6,7 +6,7 @@ #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> @@ -85,7 +85,7 @@ static inline void volk_16i_max_star_16i_a16_ssse3(short* target, short* src0, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_max_star_16i_a16_generic(short* target, short* src0, unsigned int num_bytes) { diff --git a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h index 695e08dbf..ece6adb40 100644 --- a/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h +++ b/volk/include/volk/volk_16i_max_star_horizontal_16i_a16.h @@ -6,7 +6,7 @@ #include<stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include<xmmintrin.h> #include<emmintrin.h> @@ -109,7 +109,7 @@ static inline void volk_16i_max_star_horizontal_16i_a16_ssse3(int16_t* target, #endif /*LV_HAVE_SSSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_max_star_horizontal_16i_a16_generic(int16_t* target, int16_t* src0, unsigned int num_bytes) { int i = 0; diff --git a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h index e52a949fb..ae1a18157 100644 --- a/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h +++ b/volk/include/volk/volk_16i_permute_and_scalar_add_a16.h @@ -8,7 +8,7 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> @@ -116,7 +116,7 @@ static inline void volk_16i_permute_and_scalar_add_a16_sse2(short* target, sho #endif /*LV_HAVE_SSEs*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_permute_and_scalar_add_a16_generic(short* target, short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, short* cntl3, short* scalars, unsigned int num_bytes) { int i = 0; diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h index 83fd26ff9..09bc252f0 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -58,7 +58,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse4_1(float* outputVector, con } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -94,7 +94,7 @@ static inline void volk_16i_s32f_convert_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer diff --git a/volk/include/volk/volk_16i_s32f_convert_32f_u.h b/volk/include/volk/volk_16i_s32f_convert_32f_u.h index 8f0dd0083..d34acc091 100644 --- a/volk/include/volk/volk_16i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_16i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -59,7 +59,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! @@ -96,7 +96,7 @@ static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const in } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 16 bit input data buffer diff --git a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h index e4ec5ab4e..94e5eb986 100644 --- a/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h +++ b/volk/include/volk/volk_16i_x4_quad_max_star_16i_a16.h @@ -9,7 +9,7 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<emmintrin.h> @@ -167,7 +167,7 @@ static inline void volk_16i_x4_quad_max_star_16i_a16_sse2(short* target, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_x4_quad_max_star_16i_a16_generic(short* target, short* src0, short* src1, short* src2, short* src3, unsigned int num_bytes) { int i = 0; diff --git a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h index 5744ca3a6..c157bf64a 100644 --- a/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h +++ b/volk/include/volk/volk_16i_x5_add_quad_16i_x4_a16.h @@ -9,7 +9,7 @@ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include<xmmintrin.h> #include<emmintrin.h> @@ -111,7 +111,7 @@ static inline void volk_16i_x5_add_quad_16i_x4_a16_sse2(short* target0, short* #endif /*LV_HAVE_SSE2*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_16i_x5_add_quad_16i_x4_a16_generic(short* target0, short* target1, short* target2, short* target3, short* src0, short* src1, short* src2, short* src3, short* src4, unsigned int num_bytes) { diff --git a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h index 7e08bf182..227a92303 100644 --- a/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_16i_x2_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -52,7 +52,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_ssse3(int16_t* iBuffer, int } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data @@ -120,7 +120,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_sse2(int16_t* iBuffer, int1 } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector @@ -140,7 +140,7 @@ static inline void volk_16ic_deinterleave_16i_x2_a16_generic(int16_t* iBuffer, i } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into I & Q vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h index 388c00592..35d0e8be2 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_16i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -47,7 +47,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_ssse3(int16_t* iBuffer, c #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I vector data @@ -96,7 +96,7 @@ static inline void volk_16ic_deinterleave_real_16i_a16_sse2(int16_t* iBuffer, co } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h index 55a25702e..bdf5fc162 100644 --- a/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_16ic_deinterleave_real_8i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data @@ -59,7 +59,7 @@ static inline void volk_16ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, con } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector @@ -77,7 +77,7 @@ static inline void volk_16ic_deinterleave_real_8i_a16_generic(int8_t* iBuffer, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_16ic_magnitude_16i_a16.h b/volk/include/volk/volk_16ic_magnitude_16i_a16.h index bdcace750..73c6f3390 100644 --- a/volk/include/volk/volk_16ic_magnitude_16i_a16.h +++ b/volk/include/volk/volk_16ic_magnitude_16i_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_16ic_magnitude_16i_a16_H #define INCLUDED_volk_16ic_magnitude_16i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -25,8 +26,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -76,7 +77,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse3(int16_t* magnitudeVector, co } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -96,8 +97,8 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float inputFloatBuffer[4] __attribute__((aligned(128))); - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ @@ -153,7 +154,7 @@ static inline void volk_16ic_magnitude_16i_a16_sse(int16_t* magnitudeVector, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -173,7 +174,7 @@ static inline void volk_16ic_magnitude_16i_a16_generic(int16_t* magnitudeVector, } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h index 606de2fc5..e4a9015b4 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_32f_x2_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl __m128 invScalar = _mm_set_ps1(1.0/scalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ @@ -68,7 +69,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, fl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector @@ -89,7 +90,7 @@ static inline void volk_16ic_s32f_deinterleave_32f_x2_a16_generic(float* iBuffer } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the complex 16 bit vector into floats,scales each data point, and deinterleaves into I & Q vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h index 62331e496..993445995 100644 --- a/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_deinterleave_real_32f_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -52,7 +53,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 16 bit vector into I float vector data @@ -72,7 +73,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, __m128 invScalar = _mm_set_ps1(iScalar); int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -99,7 +100,7 @@ static inline void volk_16ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 16 bit vector into I float vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h index ae64efbeb..a136c0535 100644 --- a/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h +++ b/volk/include/volk/volk_16ic_s32f_magnitude_32f_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H #define INCLUDED_volk_16ic_s32f_magnitude_32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -25,7 +26,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ @@ -70,7 +71,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse3(float* magnitudeVector, } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -91,7 +92,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, __m128 cplxValue1, cplxValue2, result, re, im; - float inputFloatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8]; for(;number < quarterPoints; number++){ inputFloatBuffer[0] = (float)(complexVectorPtr[0]); @@ -140,7 +141,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_sse(float* magnitudeVector, #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -161,7 +162,7 @@ static inline void volk_16ic_s32f_magnitude_32f_a16_generic(float* magnitudeVect } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC_DISABLED +#ifdef LV_HAVE_ORC_DISABLED /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values diff --git a/volk/include/volk/volk_16u_byteswap_a16.h b/volk/include/volk/volk_16u_byteswap_a16.h index c8128dbab..f393c05c5 100644 --- a/volk/include/volk/volk_16u_byteswap_a16.h +++ b/volk/include/volk/volk_16u_byteswap_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -43,7 +43,7 @@ static inline void volk_16u_byteswap_a16_sse2(uint16_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap @@ -61,7 +61,7 @@ static inline void volk_16u_byteswap_a16_generic(uint16_t* intsToSwap, unsigned } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Byteswaps (in-place) an aligned vector of int16_t's. \param intsToSwap The vector of data to byte swap diff --git a/volk/include/volk/volk_32f_accumulator_s32f_a16.h b/volk/include/volk/volk_32f_accumulator_s32f_a16.h index 4a3588e6d..dd24a1e29 100644 --- a/volk/include/volk/volk_32f_accumulator_s32f_a16.h +++ b/volk/include/volk/volk_32f_accumulator_s32f_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_accumulator_s32f_a16_H #define INCLUDED_volk_32f_accumulator_s32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Accumulates the values in the input buffer @@ -18,7 +19,7 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float tempBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float tempBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -42,7 +43,7 @@ static inline void volk_32f_accumulator_s32f_a16_sse(float* result, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Accumulates the values in the input buffer \param result The accumulated result diff --git a/volk/include/volk/volk_32f_convert_64f_a16.h b/volk/include/volk/volk_32f_convert_64f_a16.h index c303dc118..8ca83220b 100644 --- a/volk/include/volk/volk_32f_convert_64f_a16.h +++ b/volk/include/volk/volk_32f_convert_64f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values diff --git a/volk/include/volk/volk_32f_convert_64f_u.h b/volk/include/volk/volk_32f_convert_64f_u.h index a825767de..387baa3b9 100644 --- a/volk/include/volk/volk_32f_convert_64f_u.h +++ b/volk/include/volk/volk_32f_convert_64f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the float values into double values diff --git a/volk/include/volk/volk_32f_index_max_16u_a16.h b/volk/include/volk/volk_32f_index_max_16u_a16.h index d070e17d5..af1f35348 100644 --- a/volk/include/volk/volk_32f_index_max_16u_a16.h +++ b/volk/include/volk/volk_32f_index_max_16u_a16.h @@ -2,10 +2,11 @@ #define INCLUDED_volk_32f_index_max_16u_a16_H #include <volk/volk_common.h> +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include<smmintrin.h> static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { @@ -25,8 +26,8 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -63,7 +64,7 @@ static inline void volk_32f_index_max_16u_a16_sse4_1(unsigned int* target, const #endif /*LV_HAVE_SSE4_1*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include<xmmintrin.h> static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const float* src0, unsigned int num_points) { @@ -83,8 +84,8 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl __m128 compareResults; __m128 currentValues; - float maxValuesBuffer[4] __attribute__((aligned(16))); - float maxIndexesBuffer[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; for(;number < quarterPoints; number++){ @@ -122,7 +123,7 @@ static inline void volk_32f_index_max_16u_a16_sse(unsigned int* target, const fl #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_index_max_16u_a16_generic(unsigned int* target, const float* src0, unsigned int num_points) { if(num_points > 0){ float max = src0[0]; diff --git a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h index ff4d5b19c..6efd21a37 100644 --- a/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_32f_fm_detect_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. @@ -78,7 +78,7 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_a16_sse(float* outputVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the FM-detect differentiation on the input vector and stores the results in the output vector. \param outputVector The byte-aligned vector where the results will be stored. diff --git a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h index 168245d65..f5b388e6d 100644 --- a/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_calc_spectral_noise_floor_32f_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H #define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the spectral noise floor of an input power spectrum @@ -21,7 +22,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no const unsigned int quarterPoints = num_points / 4; const float* dataPointsPtr = realDataPoints; - float avgPointsVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4]; __m128 dataPointsVal; __m128 avgPointsVal = _mm_setzero_ps(); @@ -87,7 +88,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no sumMean += avgPointsVector[3]; // Calculate the number of valid bins from the remaning count - float validBinCountVector[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4]; _mm_store_ps(validBinCountVector, vValidBinCount); float validBinCount = 0; @@ -116,7 +117,7 @@ static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a16_sse(float* no } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the spectral noise floor of an input power spectrum diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h index d6b16e336..4acd2e13e 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_s32f_convert_16i_a16_H #define INCLUDED_volk_32f_s32f_convert_16i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -44,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse2(int16_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -63,7 +64,7 @@ static inline void volk_32f_s32f_convert_16i_a16_sse(int16_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h index 4d306e53c..dec3f1611 100644 --- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -45,7 +45,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value @@ -65,7 +65,7 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h index ae874fd7b..3f5044313 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_a16.h @@ -1,10 +1,47 @@ #ifndef INCLUDED_volk_32f_s32f_convert_32i_a16_H #define INCLUDED_volk_32f_s32f_convert_32i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_AVX +#include <immintrin.h> + /*! + \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value + \param inputVector The floating point input data buffer + \param outputVector The 32 bit output data buffer + \param scalar The value multiplied against each point in the input buffer + \param num_points The number of data values to be converted + */ +static inline void volk_32f_s32f_convert_32i_a16_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + + const unsigned int eighthPoints = num_points / 8; + + const float* inputVectorPtr = (const float*)inputVector; + int32_t* outputVectorPtr = outputVector; + __m256 vScalar = _mm256_set1_ps(scalar); + __m256 inputVal1; + __m256i intInputVal1; + + for(;number < eighthPoints; number++){ + inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8; + + intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar)); + + _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1); + outputVectorPtr += 8; + } + + number = eighthPoints * 8; + for(; number < num_points; number++){ + outputVector[number] = (int32_t)(inputVector[number] * scalar); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -40,7 +77,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse2(int32_t* outputVector, con } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -59,7 +96,7 @@ static inline void volk_32f_s32f_convert_32i_a16_sse(int32_t* outputVector, cons __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h index 561fcd800..b4e954dc4 100644 --- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value @@ -61,7 +61,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h index f64f2a213..c114ea38f 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_a16.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_s32f_convert_8i_a16_H #define INCLUDED_volk_32f_s32f_convert_8i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -51,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse2(int8_t* outputVector, const } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -70,7 +71,7 @@ static inline void volk_32f_s32f_convert_8i_a16_sse(int8_t* outputVector, const __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_load_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h index 420693571..1c6bf87c9 100644 --- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h +++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -52,7 +52,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value @@ -72,7 +72,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl __m128 vScalar = _mm_set_ps1(scalar); __m128 ret; - float outputFloatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; for(;number < quarterPoints; number++){ ret = _mm_loadu_ps(inputVectorPtr); diff --git a/volk/include/volk/volk_32f_s32f_normalize_a16.h b/volk/include/volk/volk_32f_s32f_normalize_a16.h index 0850cddf7..e6195cd32 100644 --- a/volk/include/volk/volk_32f_s32f_normalize_a16.h +++ b/volk/include/volk/volk_32f_s32f_normalize_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Normalizes all points in the buffer by the scalar value ( divides each data point by the scalar value ) @@ -41,7 +41,7 @@ static inline void volk_32f_s32f_normalize_a16_sse(float* vecBuffer, const float } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -60,7 +60,7 @@ static inline void volk_32f_s32f_normalize_a16_generic(float* vecBuffer, const f } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Normalizes the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_s32f_power_32f_a16.h b/volk/include/volk/volk_32f_s32f_power_32f_a16.h index 3ed594d9a..ecff901e1 100644 --- a/volk/include/volk/volk_32f_s32f_power_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_power_32f_a16.h @@ -5,10 +5,10 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <tmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -26,7 +26,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -62,10 +62,10 @@ static inline void volk_32f_s32f_power_32f_a16_sse4_1(float* cVector, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -83,7 +83,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* float* cPtr = cVector; const float* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 zeroValue = _mm_setzero_ps(); __m128 signMask; @@ -119,7 +119,7 @@ static inline void volk_32f_s32f_power_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h index 32f4fa067..c2b903657 100644 --- a/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h +++ b/volk/include/volk/volk_32f_s32f_stddev_32f_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H #define INCLUDED_volk_32f_s32f_stddev_32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -22,7 +23,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal1, aVal2, aVal3, aVal4; @@ -65,7 +66,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const floa } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation of the input buffer using the supplied mean @@ -82,7 +83,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* const float* aPtr = inputBuffer; - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 squareAccumulator = _mm_setzero_ps(); __m128 aVal = _mm_setzero_ps(); @@ -111,7 +112,7 @@ static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation of the input buffer using the supplied mean \param stddev The calculated standard deviation diff --git a/volk/include/volk/volk_32f_sqrt_32f_a16.h b/volk/include/volk/volk_32f_sqrt_32f_a16.h index 513c2cffe..a9ce76f88 100644 --- a/volk/include/volk/volk_32f_sqrt_32f_a16.h +++ b/volk/include/volk/volk_32f_sqrt_32f_a16.h @@ -5,7 +5,7 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Sqrts the two input vectors and store their results in the third vector @@ -40,7 +40,7 @@ static inline void volk_32f_sqrt_32f_a16_sse(float* cVector, const float* aVecto } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Sqrts the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -58,7 +58,7 @@ static inline void volk_32f_sqrt_32f_a16_generic(float* cVector, const float* aV } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC extern void volk_32f_sqrt_32f_a16_orc_impl(float *, const float*, unsigned int); /*! \brief Sqrts the two input vectors and store their results in the third vector diff --git a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h index 278089841..10d72e09d 100644 --- a/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h +++ b/volk/include/volk/volk_32f_stddev_and_mean_32f_x2_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -22,8 +23,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo const unsigned int sixteenthPoints = num_points / 16; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -78,7 +79,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, flo } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the standard deviation and mean of the input buffer @@ -95,8 +96,8 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* const unsigned int quarterPoints = num_points / 4; const float* aPtr = inputBuffer; - float meanBuffer[4] __attribute__((aligned(128))); - float squareBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; __m128 accumulator = _mm_setzero_ps(); __m128 squareAccumulator = _mm_setzero_ps(); @@ -134,7 +135,7 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the standard deviation and mean of the input buffer \param stddev The calculated standard deviation diff --git a/volk/include/volk/volk_32f_x2_add_32f_a16.h b/volk/include/volk/volk_32f_x2_add_32f_a16.h index d0d0e0a0e..2de6a6644 100644 --- a/volk/include/volk/volk_32f_x2_add_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_add_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Adds the two input vectors and store their results in the third vector @@ -43,7 +43,7 @@ static inline void volk_32f_x2_add_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -63,7 +63,7 @@ static inline void volk_32f_x2_add_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Adds the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x2_divide_32f_a16.h b/volk/include/volk/volk_32f_x2_divide_32f_a16.h index d844e25b0..1603e78de 100644 --- a/volk/include/volk/volk_32f_x2_divide_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_divide_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Divides the two input vectors and store their results in the third vector @@ -43,7 +43,7 @@ static inline void volk_32f_x2_divide_32f_a16_sse(float* cVector, const float* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -63,7 +63,7 @@ static inline void volk_32f_x2_divide_32f_a16_generic(float* cVector, const floa } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Divides the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h index 61aa56815..2cd974070 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a16_H #define INCLUDED_volk_32f_x2_dot_prod_32f_a16_H +#include <volk/volk_common.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const float * input, const float * taps, unsigned int num_points) { @@ -24,7 +25,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_generic(float * result, const fl #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float* input, const float* taps, unsigned int num_points) { @@ -53,7 +54,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,7 +74,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse( float* result, const float #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> @@ -102,7 +103,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,7 +121,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse3(float * result, const float #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> @@ -163,7 +164,7 @@ static inline void volk_32f_x2_dot_prod_32f_a16_sse4_1(float * result, const flo dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h index 8469a3cea..7f47122ff 100644 --- a/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h +++ b/volk/include/volk/volk_32f_x2_dot_prod_32f_u.h @@ -4,7 +4,7 @@ #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const float * input, const float * taps, unsigned int num_points) { @@ -24,7 +24,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_generic(float * result, const floa #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* input, const float* taps, unsigned int num_points) { @@ -53,7 +53,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -73,7 +73,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const float* #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> @@ -102,7 +102,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * bPtr += 4; } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; dotProdVal = _mm_hadd_ps(dotProdVal, dotProdVal); _mm_store_ps(dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -120,7 +120,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float * #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> @@ -163,7 +163,7 @@ static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const float dotProdVal = _mm_add_ps(dotProdVal, cVal1); } - float dotProductVector[4] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) float dotProductVector[4]; _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into the dot product vector dotProduct = dotProductVector[0]; diff --git a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h index 29c9392df..f3731fa2a 100644 --- a/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h +++ b/volk/include/volk/volk_32f_x2_interleave_32fc_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector @@ -48,7 +48,7 @@ static inline void volk_32f_x2_interleave_32fc_a16_sse(lv_32fc_t* complexVector, } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector. \param iBuffer The I buffer data to be interleaved diff --git a/volk/include/volk/volk_32f_x2_max_32f_a16.h b/volk/include/volk/volk_32f_x2_max_32f_a16.h index 26e7f1246..60be6e36d 100644 --- a/volk/include/volk/volk_32f_x2_max_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_max_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -45,7 +45,7 @@ static inline void volk_32f_x2_max_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -67,7 +67,7 @@ static inline void volk_32f_x2_max_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x2_min_32f_a16.h b/volk/include/volk/volk_32f_x2_min_32f_a16.h index 23bae044c..3b8291531 100644 --- a/volk/include/volk/volk_32f_x2_min_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_min_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -45,7 +45,7 @@ static inline void volk_32f_x2_min_32f_a16_sse(float* cVector, const float* aVec } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -67,7 +67,7 @@ static inline void volk_32f_x2_min_32f_a16_generic(float* cVector, const float* } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h index a0dcfa86e..885941abf 100644 --- a/volk/include/volk/volk_32f_x2_multiply_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_multiply_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplys the two input vectors and store their results in the third vector @@ -43,7 +43,46 @@ static inline void volk_32f_x2_multiply_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_AVX +#include <immintrin.h> +/*! + \brief Multiplies the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_32f_x2_multiply_32f_a16_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m256 aVal, bVal, cVal; + for(;number < eighthPoints; number++){ + + aVal = _mm256_load_ps(aPtr); + bVal = _mm256_load_ps(bPtr); + + cVal = _mm256_mul_ps(aVal, bVal); + + _mm256_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 8; + bPtr += 8; + cPtr += 8; + } + + number = eighthPoints * 8; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -63,7 +102,7 @@ static inline void volk_32f_x2_multiply_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplys the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h index 30306774d..f7ad3fd18 100644 --- a/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h +++ b/volk/include/volk/volk_32f_x2_s32f_interleave_16ic_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -62,7 +63,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse2(lv_16sc_t* complexV } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. @@ -85,7 +86,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe int16_t* complexVectorPtr = (int16_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ iValue = _mm_load_ps(iBufferPtr); @@ -127,7 +128,7 @@ static inline void volk_32f_x2_s32f_interleave_16ic_a16_sse(lv_16sc_t* complexVe } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. \param iBuffer The I buffer data to be interleaved diff --git a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h index 7404bfe79..c01f2c1f3 100644 --- a/volk/include/volk/volk_32f_x2_subtract_32f_a16.h +++ b/volk/include/volk/volk_32f_x2_subtract_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Subtracts bVector form aVector and store their results in the cVector @@ -43,7 +43,7 @@ static inline void volk_32f_x2_subtract_32f_a16_sse(float* cVector, const float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored @@ -63,7 +63,7 @@ static inline void volk_32f_x2_subtract_32f_a16_generic(float* cVector, const fl } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Subtracts bVector form aVector and store their results in the cVector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h index af9e39537..6e446cbef 100644 --- a/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h +++ b/volk/include/volk/volk_32f_x3_sum_of_poly_32f_a16.h @@ -9,7 +9,7 @@ #define MAX(X,Y) ((X) > (Y)?(X):(Y)) #endif -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> @@ -98,7 +98,7 @@ static inline void volk_32f_x3_sum_of_poly_32f_a16_sse3(float* target, float* sr #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32f_x3_sum_of_poly_32f_a16_generic(float* target, float* src0, float* center_point_array, float* cutoff, unsigned int num_bytes) { diff --git a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h index 514998800..846315a4a 100644 --- a/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_32f_multiply_32fc_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Multiplies the input complex vector with the input float vector and store their results in the third vector @@ -56,7 +56,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_sse(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored @@ -76,7 +76,7 @@ static inline void volk_32fc_32f_multiply_32fc_a16_generic(lv_32fc_t* cVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h index 84d2576ed..3e7c3fa28 100644 --- a/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_32f_x2_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I & Q vector data @@ -49,7 +49,7 @@ static inline void volk_32fc_deinterleave_32f_x2_a16_sse(float* iBuffer, float* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I & Q vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h index 34262a7af..945a26742 100644 --- a/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_64f_x2_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data @@ -51,7 +51,7 @@ static inline void volk_32fc_deinterleave_64f_x2_a16_sse2(double* iBuffer, doubl } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h index 9838ec88b..3c3fb2583 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -44,7 +44,7 @@ static inline void volk_32fc_deinterleave_real_32f_a16_sse(float* iBuffer, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h index af392d074..40c1a7a46 100644 --- a/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h +++ b/volk/include/volk/volk_32fc_deinterleave_real_64f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Deinterleaves the complex vector into I vector data @@ -42,7 +42,7 @@ static inline void volk_32fc_deinterleave_real_64f_a16_sse2(double* iBuffer, con } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector into I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_32fc_index_max_16u_a16.h b/volk/include/volk/volk_32fc_index_max_16u_a16.h index 532ae4e7c..0ad1edbe9 100644 --- a/volk/include/volk/volk_32fc_index_max_16u_a16.h +++ b/volk/include/volk/volk_32fc_index_max_16u_a16.h @@ -6,7 +6,7 @@ #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> @@ -188,7 +188,7 @@ static inline void volk_32fc_index_max_16u_a16_sse3(unsigned int* target, lv_32f #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_index_max_16u_a16_generic(unsigned int* target, lv_32fc_t* src0, unsigned int num_bytes) { float sq_dist = 0.0; float max = 0.0; diff --git a/volk/include/volk/volk_32fc_magnitude_32f_a16.h b/volk/include/volk/volk_32fc_magnitude_32f_a16.h index be7216dce..946190e41 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_a16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_a16.h @@ -5,7 +5,7 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -48,7 +48,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse3(float* magnitudeVector, cons } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector @@ -96,7 +96,7 @@ static inline void volk_32fc_magnitude_32f_a16_sse(float* magnitudeVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -115,7 +115,7 @@ static inline void volk_32fc_magnitude_32f_a16_generic(float* magnitudeVector, c } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values diff --git a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h index e9f74438d..55b1b6c70 100644 --- a/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_atan2_32f_a16.h @@ -5,10 +5,10 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -27,7 +27,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -67,10 +67,10 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse4_1(float* outputVector, con #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -89,7 +89,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const const unsigned int quarterPoints = num_points / 4; const float invNormalizeFactor = 1.0 / normalizeFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 testVector = _mm_set_ps1(2*M_PI); __m128 correctVector = _mm_set_ps1(M_PI); __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); @@ -131,7 +131,7 @@ static inline void volk_32fc_s32f_atan2_32f_a16_sse(float* outputVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief performs the atan2 on the input vector and stores the results in the output vector. \param outputVector The vector where the results will be stored. diff --git a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h index 31465bff9..2460039d2 100644 --- a/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_deinterleave_real_16i_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data @@ -24,7 +25,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer __m128 cplxValue1, cplxValue2, iValue; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -54,7 +55,7 @@ static inline void volk_32fc_s32f_deinterleave_real_16i_a16_sse(int16_t* iBuffer } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h index 530359600..f67ab0607 100644 --- a/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h +++ b/volk/include/volk/volk_32fc_s32f_magnitude_16i_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H #define INCLUDED_volk_32fc_s32f_magnitude_16i_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -25,7 +26,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto __m128 cplxValue1, cplxValue2, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -60,7 +61,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse3(int16_t* magnitudeVecto } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector @@ -80,7 +81,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector __m128 cplxValue1, cplxValue2, iValue, qValue, result; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ cplxValue1 = _mm_load_ps(complexVectorPtr); @@ -120,7 +121,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_sse(int16_t* magnitudeVector } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values @@ -140,7 +141,7 @@ static inline void volk_32fc_s32f_magnitude_16i_a16_generic(int16_t* magnitudeVe } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector \param complexVector The vector containing the complex input values diff --git a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h index 3507fdb3c..155b93ca2 100644 --- a/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_32fc_a16.h @@ -3,11 +3,19 @@ #include <inttypes.h> #include <stdio.h> +#include <math.h> -#if LV_HAVE_SSE +//! raise a complex float to a real float power +static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a16(const lv_32fc_t exp, const float power){ + const float arg = power*atan2f(lv_creal(exp), lv_cimag(exp)); + const float mag = powf(lv_creal(exp)*lv_creal(exp) + lv_cimag(exp)*lv_cimag(exp), power/2); + return mag*lv_cmake(cosf(arg), sinf(arg)); +} + +#ifdef LV_HAVE_SSE #include <xmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -25,7 +33,7 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 vPower = _mm_set_ps1(power); __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue; @@ -72,16 +80,13 @@ static inline void volk_32fc_s32f_power_32fc_a16_sse(lv_32fc_t* cVector, const l number = quarterPoints * 4; #endif /* LV_HAVE_LIB_SIMDMATH */ - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0; for(;number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a16((*aPtr++), power); } } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Takes each the input complex vector value to the specified power and stores the results in the return vector \param cVector The vector where the results will be stored @@ -93,12 +98,9 @@ static inline void volk_32fc_s32f_power_32fc_a16_generic(lv_32fc_t* cVector, con lv_32fc_t* cPtr = cVector; const lv_32fc_t* aPtr = aVector; unsigned int number = 0; - lv_32fc_t complexPower; - ((float*)&complexPower)[0] = power; - ((float*)&complexPower)[1] = 0.0; for(number = 0; number < num_points; number++){ - *cPtr++ = lv_cpow((*aPtr++), complexPower); + *cPtr++ = __volk_s32fc_s32f_power_s32fc_a16((*aPtr++), power); } } #endif /* LV_HAVE_GENERIC */ diff --git a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h index 39d8f7aa2..03da069c2 100644 --- a/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_power_spectrum_32f_a16.h @@ -5,10 +5,10 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -24,7 +24,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut float* destPtr = logPowerOutput; uint64_t number = 0; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -88,7 +88,7 @@ static inline void volk_32fc_s32f_power_spectrum_32f_a16_sse3(float* logPowerOut } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value for each input point \param logPowerOutput The 10.0 * log10(r*r + i*i) for each data point diff --git a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h index 0120b5307..5bcd7f7c4 100644 --- a/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h +++ b/volk/include/volk/volk_32fc_s32f_x2_power_spectral_density_32f_a16.h @@ -5,10 +5,10 @@ #include <stdio.h> #include <math.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH #include <simdmath.h> #endif /* LV_HAVE_LIB_SIMDMATH */ @@ -27,7 +27,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* const float iRBW = 1.0 / rbw; const float iNormalizationFactor = 1.0 / normalizationFactor; -#if LV_HAVE_LIB_SIMDMATH +#ifdef LV_HAVE_LIB_SIMDMATH __m128 magScalar = _mm_set_ps1(10.0); magScalar = _mm_div_ps(magScalar, logf4(magScalar)); @@ -94,7 +94,7 @@ static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a16_sse3(float* } #endif /* LV_HAVE_SSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Calculates the log10 power value divided by the RBW for each input point \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h index a01971df3..f221237ff 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_a16.h @@ -1,11 +1,12 @@ #ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H #define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a16_H +#include <volk/volk_common.h> #include<volk/volk_complex.h> #include<stdio.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { @@ -64,7 +65,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_generic(lv_32fc_t* r static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; @@ -205,7 +206,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse(lv_32fc_t* resul #if LV_HAVE_SSE && LV_HAVE_32 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; int bound = num_bytes >> 4; int leftovers = num_bytes % 16; diff --git a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h index 2fa5918cc..6b22d9f81 100644 --- a/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h +++ b/volk/include/volk/volk_32fc_x2_conjugate_dot_prod_32fc_u.h @@ -5,7 +5,7 @@ #include<volk/volk_complex.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { @@ -57,7 +57,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <xmmintrin.h> #include <pmmintrin.h> @@ -66,7 +66,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_generic(lv_32fc_t* res static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { - static const uint32_t conjugator[4] __attribute__((aligned(16)))= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; + __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 0x80000000, 0x00000000, 0x80000000}; union HalfMask { uint32_t intRep[4]; @@ -131,7 +131,7 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* result Isum += Im; } - result[0] = lv_32fc_init(Rsum,Isum); + result[0] = lv_cmake(Rsum,Isum); return; } diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h index 9a7b65ab4..9657c8f6b 100644 --- a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_a16.h @@ -1,12 +1,13 @@ #ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H #define INCLUDED_volk_32fc_x2_dot_prod_32fc_a16_H +#include <volk/volk_common.h> #include <volk/volk_complex.h> #include <stdio.h> #include <string.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_dot_prod_32fc_a16_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_bytes) { @@ -316,7 +317,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse_32(lv_32fc_t* result, cons #endif /*LV_HAVE_SSE*/ -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> @@ -358,7 +359,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const b += 2; } - lv_32fc_t dotProductVector[2] __attribute__((aligned(16))); + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector @@ -373,7 +374,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a16_sse3(lv_32fc_t* result, const #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h index b4214f5d2..72010b855 100644 --- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h +++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a16.h @@ -6,7 +6,7 @@ #include <volk/volk_complex.h> #include <float.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include <pmmintrin.h> /*! \brief Multiplies the two input complex vectors and stores their results in the third vector @@ -53,7 +53,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_sse3(lv_32fc_t* cVector, const } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored @@ -73,7 +73,7 @@ static inline void volk_32fc_x2_multiply_32fc_a16_generic(lv_32fc_t* cVector, co } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Multiplies the two input complex vectors and stores their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h index 6a863b16d..910f51679 100644 --- a/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16.h @@ -6,7 +6,7 @@ #include<volk/volk_complex.h> #include <string.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> @@ -105,7 +105,7 @@ static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_sse3(float* #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; diff --git a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h index 406097fc8..551f3cb53 100644 --- a/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h +++ b/volk/include/volk/volk_32fc_x2_square_dist_32f_a16.h @@ -5,7 +5,7 @@ #include<stdio.h> #include<volk/volk_complex.h> -#if LV_HAVE_SSE3 +#ifdef LV_HAVE_SSE3 #include<xmmintrin.h> #include<pmmintrin.h> @@ -91,7 +91,7 @@ static inline void volk_32fc_x2_square_dist_32f_a16_sse3(float* target, lv_32fc_ #endif /*LV_HAVE_SSE3*/ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32fc_x2_square_dist_32f_a16_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_bytes) { lv_32fc_t diff; float sq_dist; diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h index 0fcadd9cb..b744c7197 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -47,7 +47,7 @@ static inline void volk_32i_s32f_convert_32f_a16_sse2(float* outputVector, const #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer diff --git a/volk/include/volk/volk_32i_s32f_convert_32f_u.h b/volk/include/volk/volk_32i_s32f_convert_32f_u.h index 1dd6422f8..d8afd218c 100644 --- a/volk/include/volk/volk_32i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_32i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const i #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 32 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 32 bit input data buffer diff --git a/volk/include/volk/volk_32i_x2_and_32i_a16.h b/volk/include/volk/volk_32i_x2_and_32i_a16.h index 3baa1d856..4d50efd32 100644 --- a/volk/include/volk/volk_32i_x2_and_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_and_32i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ands the two input vectors and store their results in the third vector @@ -43,7 +43,7 @@ static inline void volk_32i_x2_and_32i_a16_sse(int32_t* cVector, const int32_t* } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -63,7 +63,7 @@ static inline void volk_32i_x2_and_32i_a16_generic(int32_t* cVector, const int32 } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ands the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32i_x2_or_32i_a16.h b/volk/include/volk/volk_32i_x2_or_32i_a16.h index 0be22f00a..9edbdbafd 100644 --- a/volk/include/volk/volk_32i_x2_or_32i_a16.h +++ b/volk/include/volk/volk_32i_x2_or_32i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Ors the two input vectors and store their results in the third vector @@ -43,7 +43,7 @@ static inline void volk_32i_x2_or_32i_a16_sse(int32_t* cVector, const int32_t* a } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored @@ -63,7 +63,7 @@ static inline void volk_32i_x2_or_32i_a16_generic(int32_t* cVector, const int32_ } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Ors the two input vectors and store their results in the third vector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_32u_byteswap_a16.h b/volk/include/volk/volk_32u_byteswap_a16.h index 7556ec7b1..dc5cedab9 100644 --- a/volk/include/volk/volk_32u_byteswap_a16.h +++ b/volk/include/volk/volk_32u_byteswap_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -51,7 +51,7 @@ static inline void volk_32u_byteswap_a16_sse2(uint32_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int32_t's. \param intsToSwap The vector of data to byte swap diff --git a/volk/include/volk/volk_32u_popcnt_a16.h b/volk/include/volk/volk_32u_popcnt_a16.h index f6e25e4e8..0d8b48fd5 100644 --- a/volk/include/volk/volk_32u_popcnt_a16.h +++ b/volk/include/volk/volk_32u_popcnt_a16.h @@ -5,7 +5,7 @@ #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t value) { @@ -23,7 +23,7 @@ static inline void volk_32u_popcnt_a16_generic(uint32_t* ret, const uint32_t val #endif /*LV_HAVE_GENERIC*/ -#if LV_HAVE_SSE4_2 +#ifdef LV_HAVE_SSE4_2 #include <nmmintrin.h> diff --git a/volk/include/volk/volk_64f_convert_32f_a16.h b/volk/include/volk/volk_64f_convert_32f_a16.h index 7dca065f0..cfcdbdc3a 100644 --- a/volk/include/volk/volk_64f_convert_32f_a16.h +++ b/volk/include/volk/volk_64f_convert_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values diff --git a/volk/include/volk/volk_64f_convert_32f_u.h b/volk/include/volk/volk_64f_convert_32f_u.h index 6338c1433..5c323230a 100644 --- a/volk/include/volk/volk_64f_convert_32f_u.h +++ b/volk/include/volk/volk_64f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Converts the double values into float values diff --git a/volk/include/volk/volk_64f_x2_max_64f_a16.h b/volk/include/volk/volk_64f_x2_max_64f_a16.h index 4b0c1f5f1..21f488bf7 100644 --- a/volk/include/volk/volk_64f_x2_max_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_max_64f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector @@ -45,7 +45,7 @@ static inline void volk_64f_x2_max_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_64f_x2_min_64f_a16.h b/volk/include/volk/volk_64f_x2_min_64f_a16.h index aa961e384..8711a0eae 100644 --- a/volk/include/volk/volk_64f_x2_min_64f_a16.h +++ b/volk/include/volk/volk_64f_x2_min_64f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector @@ -45,7 +45,7 @@ static inline void volk_64f_x2_min_64f_a16_sse2(double* cVector, const double* a } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Selects minimum value from each entry between bVector and aVector and store their results in the cVector \param cVector The vector where the results will be stored diff --git a/volk/include/volk/volk_64u_byteswap_a16.h b/volk/include/volk/volk_64u_byteswap_a16.h index 0eefe0138..b4bed8451 100644 --- a/volk/include/volk/volk_64u_byteswap_a16.h +++ b/volk/include/volk/volk_64u_byteswap_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE2 +#ifdef LV_HAVE_SSE2 #include <emmintrin.h> /*! @@ -59,7 +59,7 @@ static inline void volk_64u_byteswap_a16_sse2(uint64_t* intsToSwap, unsigned int } #endif /* LV_HAVE_SSE2 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Byteswaps (in-place) an aligned vector of int64_t's. \param intsToSwap The vector of data to byte swap diff --git a/volk/include/volk/volk_64u_popcnt_a16.h b/volk/include/volk/volk_64u_popcnt_a16.h index 59511dc29..8b92e91a1 100644 --- a/volk/include/volk/volk_64u_popcnt_a16.h +++ b/volk/include/volk/volk_64u_popcnt_a16.h @@ -5,7 +5,7 @@ #include <inttypes.h> -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC static inline void volk_64u_popcnt_a16_generic(uint64_t* ret, const uint64_t value) { diff --git a/volk/include/volk/volk_8i_convert_16i_a16.h b/volk/include/volk/volk_8i_convert_16i_a16.h index 3d7045753..260ac40a1 100644 --- a/volk/include/volk/volk_8i_convert_16i_a16.h +++ b/volk/include/volk/volk_8i_convert_16i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -47,7 +47,7 @@ static inline void volk_8i_convert_16i_a16_sse4_1(int16_t* outputVector, const i } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer @@ -65,7 +65,7 @@ static inline void volk_8i_convert_16i_a16_generic(int16_t* outputVector, const } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8i_convert_16i_u.h b/volk/include/volk/volk_8i_convert_16i_u.h index bcff13406..7d7104f52 100644 --- a/volk/include/volk/volk_8i_convert_16i_u.h +++ b/volk/include/volk/volk_8i_convert_16i_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -48,7 +48,7 @@ static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into 16 bit integer data \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h index 99a24ec10..9991b150e 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_a16.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -66,7 +66,7 @@ static inline void volk_8i_s32f_convert_32f_a16_sse4_1(float* outputVector, cons } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer @@ -86,7 +86,7 @@ static inline void volk_8i_s32f_convert_32f_a16_generic(float* outputVector, con } #endif /* LV_HAVE_GENERIC */ -#if LV_HAVE_ORC +#ifdef LV_HAVE_ORC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8i_s32f_convert_32f_u.h b/volk/include/volk/volk_8i_s32f_convert_32f_u.h index 1e30957e8..3cd6bb67c 100644 --- a/volk/include/volk/volk_8i_s32f_convert_32f_u.h +++ b/volk/include/volk/volk_8i_s32f_convert_32f_u.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! @@ -67,7 +67,7 @@ static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value \param inputVector The 8 bit input data buffer diff --git a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h index 91c9b2c58..249acab49 100644 --- a/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_16i_x2_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data @@ -51,7 +51,7 @@ static inline void volk_8ic_deinterleave_16i_x2_a16_sse4_1(int16_t* iBuffer, int } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h index bf3dc20dd..7b64b37c5 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_16i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data @@ -42,7 +42,7 @@ static inline void volk_8ic_deinterleave_real_16i_a16_sse4_1(int16_t* iBuffer, c #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h index 13de79423..a1abad487 100644 --- a/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h +++ b/volk/include/volk/volk_8ic_deinterleave_real_8i_a16.h @@ -4,7 +4,7 @@ #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSSE3 +#ifdef LV_HAVE_SSSE3 #include <tmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I vector data @@ -43,7 +43,7 @@ static inline void volk_8ic_deinterleave_real_8i_a16_ssse3(int8_t* iBuffer, cons } #endif /* LV_HAVE_SSSE3 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h index 22c3ebb23..7d778796e 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_32f_x2_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -74,7 +75,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse4_1(float* iBuffer, } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data @@ -95,7 +96,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo __m128 invScalar = _mm_set_ps1(1.0/scalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[8] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[8]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(complexVectorPtr[0]); @@ -136,7 +137,7 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_a16_sse(float* iBuffer, flo } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I & Q floating point vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h index 5f1430394..a2e0cd8de 100644 --- a/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h +++ b/volk/include/volk/volk_8ic_s32f_deinterleave_real_32f_a16.h @@ -1,10 +1,11 @@ #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H +#include <volk/volk_common.h> #include <inttypes.h> #include <stdio.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -61,7 +62,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_SSE +#ifdef LV_HAVE_SSE #include <xmmintrin.h> /*! \brief Deinterleaves the complex 8 bit vector into I float vector data @@ -81,7 +82,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c __m128 invScalar = _mm_set_ps1(iScalar); int8_t* complexVectorPtr = (int8_t*)complexVector; - float floatBuffer[4] __attribute__((aligned(128))); + __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; for(;number < quarterPoints; number++){ floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2; @@ -107,7 +108,7 @@ static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, c } #endif /* LV_HAVE_SSE */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Deinterleaves the complex 8 bit vector into I float vector data \param complexVector The complex input vector diff --git a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h index d9cacbf46..7307ae484 100644 --- a/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h +++ b/volk/include/volk/volk_8ic_x2_multiply_conjugate_16ic_a16.h @@ -5,7 +5,7 @@ #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -23,7 +23,6 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); for(;number < quarterPoints; number++){ // Convert into 8 bit values into 16 bit values @@ -37,7 +36,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -56,10 +55,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); @@ -68,7 +67,7 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVe } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -84,10 +83,10 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cV for(number =0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *c16Ptr++ = (int16_t)lv_creal(temp); diff --git a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h index 6ec923a4f..adc7c0599 100644 --- a/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h +++ b/volk/include/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc_a16.h @@ -5,7 +5,7 @@ #include <stdio.h> #include <volk/volk_complex.h> -#if LV_HAVE_SSE4_1 +#ifdef LV_HAVE_SSE4_1 #include <smmintrin.h> /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector @@ -24,7 +24,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t const lv_8sc_t* a = aVector; const lv_8sc_t* b = bVector; __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); - const int shuffleMask = _MM_SHUFFLE(2,3,0,1); + __m128 invScalar = _mm_set_ps1(1.0/scalar); for(;number < quarterPoints; number++){ @@ -39,7 +39,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t y = _mm_sign_epi16(y, conjugateSign); // Shift the order of the cr and ci values - y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); + y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1)); // Calculate the ar*(-ci) + cr*(ai) imagz = _mm_madd_epi16(x,y); @@ -75,10 +75,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t for(; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cFloatPtr++ = lv_creal(temp) / scalar; @@ -87,7 +87,7 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_sse4_1(lv_32fc_t } #endif /* LV_HAVE_SSE4_1 */ -#if LV_HAVE_GENERIC +#ifdef LV_HAVE_GENERIC /*! \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector \param cVector The complex vector where the results will be stored @@ -104,10 +104,10 @@ static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a16_generic(lv_32fc_ for(number = 0; number < num_points; number++){ float aReal = (float)*a8Ptr++; float aImag = (float)*a8Ptr++; - lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); + lv_32fc_t aVal = lv_cmake(aReal, aImag ); float bReal = (float)*b8Ptr++; float bImag = (float)*b8Ptr++; - lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); + lv_32fc_t bVal = lv_cmake( bReal, -bImag ); lv_32fc_t temp = aVal * bVal; *cPtr++ = (lv_creal(temp) * invScalar); diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h index 6f444ad89..2c935d1fb 100644 --- a/volk/include/volk/volk_common.h +++ b/volk/include/volk/volk_common.h @@ -1,18 +1,94 @@ -#ifndef INCLUDED_LIBVECTOR_COMMON_H -#define INCLUDED_LIBVECTOR_COMMON_H +#ifndef INCLUDED_LIBVOLK_COMMON_H +#define INCLUDED_LIBVOLK_COMMON_H + +//////////////////////////////////////////////////////////////////////// +// Cross-platform attribute macros +//////////////////////////////////////////////////////////////////////// +#if defined __GNUC__ +# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x))) +# define __VOLK_ATTR_UNUSED __attribute__((unused)) +# define __VOLK_ATTR_INLINE __attribute__((always_inline)) +# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated)) +# if __GNUC__ >= 4 +# define __VOLK_ATTR_EXPORT __attribute__((visibility("default"))) +# define __VOLK_ATTR_IMPORT __attribute__((visibility("default"))) +# else +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +# endif +#elif _MSC_VER +# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE __forceinline +# define __VOLK_ATTR_DEPRECATED __declspec(deprecated) +# define __VOLK_ATTR_EXPORT __declspec(dllexport) +# define __VOLK_ATTR_IMPORT __declspec(dllimport) +#else +# define __VOLK_ATTR_ALIGNED(x) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE +# define __VOLK_ATTR_DEPRECATED +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// Ignore annoying warnings in MSVC +//////////////////////////////////////////////////////////////////////// +#if defined(_MSC_VER) +# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data +# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2' +#endif + +//////////////////////////////////////////////////////////////////////// +// C-linkage declaration macros +// FIXME: due to the usage of complex.h, require gcc for c-linkage +//////////////////////////////////////////////////////////////////////// +#if defined(__cplusplus) && (__GNUC__) +# define __VOLK_DECL_BEGIN extern "C" { +# define __VOLK_DECL_END } +#else +# define __VOLK_DECL_BEGIN +# define __VOLK_DECL_END +#endif + +//////////////////////////////////////////////////////////////////////// +// Define VOLK_API for library symbols +// http://gcc.gnu.org/wiki/Visibility +//////////////////////////////////////////////////////////////////////// +#ifdef volk_EXPORTS +# define VOLK_API __VOLK_ATTR_EXPORT +#else +# define VOLK_API __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// The bit128 union used by some +//////////////////////////////////////////////////////////////////////// +#include <inttypes.h> + +#ifdef LV_HAVE_SSE +#include <xmmintrin.h> +#endif + +#ifdef LV_HAVE_SSE2 +#include <emmintrin.h> +#endif -#include<inttypes.h> -#if LV_HAVE_MMX -#include<xmmintrin.h> union bit128{ uint16_t i16[8]; uint32_t i[4]; float f[4]; double d[2]; - __m128i int_vec; + + #ifdef LV_HAVE_SSE __m128 float_vec; + #endif + + #ifdef LV_HAVE_SSE2 + __m128i int_vec; __m128d double_vec; + #endif }; -#endif /*LV_HAVE_MMX*/ -#endif /*INCLUDED_LIBVECTOR_COMMON_H*/ +#endif /*INCLUDED_LIBVOLK_COMMON_H*/ diff --git a/volk/include/volk/volk_complex.h b/volk/include/volk/volk_complex.h index b20b5cf47..5bd925044 100644 --- a/volk/include/volk/volk_complex.h +++ b/volk/include/volk/volk_complex.h @@ -2,8 +2,21 @@ #define INCLUDE_VOLK_COMPLEX_H /*! - \brief This header file is to prevent issues with having <complex> and <complex.h> variables in the same code as the gcc compiler does not allow that -*/ + * \brief Provide typedefs and operators for all complex types in C and C++. + * + * The typedefs encompass all signed integer and floating point types. + * Each operator function is intended to work across all data types. + * Under C++, these operators are defined as inline templates. + * Under C, these operators are defined as preprocessor macros. + * The use of macros makes the operators agnostic to the type. + * + * The following operator functions are defined: + * - lv_cmake - make a complex type from components + * - lv_creal - get the real part of the complex number + * - lv_cimag - get the imaginary part of the complex number + * - lv_conj - take the conjugate of the complex number + */ + #ifdef __cplusplus #include <complex> @@ -12,60 +25,62 @@ typedef std::complex<int8_t> lv_8sc_t; typedef std::complex<int16_t> lv_16sc_t; typedef std::complex<int32_t> lv_32sc_t; +typedef std::complex<int64_t> lv_64sc_t; typedef std::complex<float> lv_32fc_t; typedef std::complex<double> lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return x.real(); -} - -static inline float lv_cimag(const lv_32fc_t x){ - return x.imag(); +template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){ + return std::complex<T>(r, i); } -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return std::conj(x); +template <typename T> inline typename T::value_type lv_creal(const T &x){ + return x.real(); } -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return std::pow(x, y); +template <typename T> inline typename T::value_type lv_cimag(const T &x){ + return x.imag(); } -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return std::complex<float>(x,y); +template <typename T> inline T lv_conj(const T &x){ + return std::conj(x); } -#else +#else /* __cplusplus */ #include <complex.h> typedef char complex lv_8sc_t; typedef short complex lv_16sc_t; -typedef int complex lv_32sc_t; +typedef long complex lv_32sc_t; +typedef long long complex lv_64sc_t; typedef float complex lv_32fc_t; typedef double complex lv_64fc_t; -static inline float lv_creal(const lv_32fc_t x){ - return creal(x); -} +#define lv_cmake(r, i) ((r) + _Complex_I*(i)) -static inline float lv_cimag(const lv_32fc_t x){ - return cimag(x); -} +// When GNUC is available, use the complex extensions. +// The extensions always return the correct value type. +// http://gcc.gnu.org/onlinedocs/gcc/Complex.html +#ifdef __GNUC__ -static inline lv_32fc_t lv_conj(const lv_32fc_t x){ - return conj(x); -} +#define lv_creal(x) (__real__(x)) -static inline lv_32fc_t lv_cpow(const lv_32fc_t x, const lv_32fc_t y){ - return cpow(x, y); -} +#define lv_cimag(x) (__imag__(x)) -static inline lv_32fc_t lv_32fc_init(const float x, const float y){ - return x + I*y; -} +#define lv_conj(x) (~(x)) + +// When not available, use the c99 complex function family, +// which always returns double regardless of the input type. +#else /* __GNUC__ */ + +#define lv_creal(x) (creal(x)) + +#define lv_cimag(x) (cimag(x)) + +#define lv_conj(x) (conj(x)) -#endif +#endif /* __GNUC__ */ +#endif /* __cplusplus */ #endif /* INCLUDE_VOLK_COMPLEX_H */ diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h new file mode 100644 index 000000000..2a7f7e79f --- /dev/null +++ b/volk/include/volk/volk_prefs.h @@ -0,0 +1,25 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +#include <volk/volk_common.h> + +__VOLK_DECL_BEGIN + +struct VOLK_API volk_arch_pref { + char name[128]; + char arch[32]; +}; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +VOLK_API void get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +VOLK_API int load_preferences(struct volk_arch_pref **); + +__VOLK_DECL_END + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 6a5fde28f..28ec6ddaa 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -1,23 +1,4 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in -/volk.c -/volk_cpu_generic.c -/volk_cpu_powerpc.c -/volk_cpu_x86.c -/volk_environment_init.c -/volk_init.c -/volk_init.h -/volk_mktables -/volk_mktables.c -/volk_proccpu_sim.c -/volk_runtime.c -/test_all +/Makefile.am /testqa diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt new file mode 100644 index 000000000..5dd41be0f --- /dev/null +++ b/volk/lib/CMakeLists.txt @@ -0,0 +1,260 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +######################################################################## +# Parse the arches xml file: +# Test each arch to see if the compiler supports the flag. +# If the test passes append the arch to the available list. +######################################################################## +#extract the arch lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))" + OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +#This macro sets the ${arch}_flag variable, +#and handles special cases for MSVC arch flags. +MACRO(set_arch_flag name flag) + IF(MSVC AND ${name} STREQUAL "mmx") + SET(${name}_flag "/arch:SSE") #no /arch:MMX + ELSEIF(MSVC AND ${name} STREQUAL "sse") + SET(${name}_flag "/arch:SSE") + ELSEIF(MSVC AND ${name} STREQUAL "sse2") + SET(${name}_flag "/arch:SSE2") + ELSE() + SET(${name}_flag -${flag}) + ENDIF() +ENDMACRO(set_arch_flag) + +MACRO(handle_arch name flag) + + #handle special case for none flag + IF(${flag} STREQUAL "none") + SET(have_${name} TRUE) + + #otherwise test the flag against the compiler + ELSE() + INCLUDE(CheckCXXCompilerFlag) + set_arch_flag(${name} ${flag}) + CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name}) + ENDIF() + + IF(have_${name}) + LIST(APPEND available_arches ${name}) + ENDIF() +ENDMACRO(handle_arch) + +#create a list of available arches +FOREACH(arch_line ${arch_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}") + handle_arch(${args}) +ENDFOREACH(arch_line) + +MESSAGE(STATUS "Available arches: ${available_arches}") + +######################################################################## +# Parse the machines xml file: +# Test each machine to see if its arch dependencies are supported. +# Build a list of supported machines and the machine definitions. +######################################################################## +#extract the machine lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))" + OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +MACRO(handle_machine1 name) + UNSET(machine_flags) + STRING(TOUPPER LV_MACHINE_${name} machine_def) + + #check if all the arches are supported + FOREACH(arch ${ARGN}) + SET(is_match ${have_${arch}}) + IF(NOT is_match) + SET(is_match FALSE) + BREAK() + ENDIF(NOT is_match) + SET(machine_flags "${machine_flags} ${${arch}_flag}") + ENDFOREACH(arch) + + IF(is_match) + #this is a match, append the source and set its flags + SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) + SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) + LIST(APPEND machine_sources ${machine_source}) + LIST(APPEND machine_defs ${machine_def}) + LIST(APPEND available_machines ${name}) + ENDIF() +ENDMACRO(handle_machine1) + +MACRO(handle_machine name) + SET(arches ${ARGN}) + LIST(FIND arches "32|64" index) + IF(${index} EQUAL -1) + handle_machine1(${name} ${arches}) + ELSE() + LIST(REMOVE_ITEM arches "32|64") + handle_machine1(${name}_32 32 ${arches}) + handle_machine1(${name}_64 64 ${arches}) + ENDIF() +ENDMACRO(handle_machine) + +#setup the available machines +FOREACH(machine_line ${machine_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}") + handle_machine(${args}) +ENDFOREACH(machine_line) + +MESSAGE(STATUS "Available machines: ${available_machines}") + +######################################################################## +# Create rules to run the volk generator +######################################################################## +#list of the generated sources +SET(volk_gen_sources + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/lib/volk.c + ${CMAKE_BINARY_DIR}/lib/volk_init.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/lib/volk_cpu.c + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.c + ${machine_sources} +) + +#dependencies are all python, xml, and header implementation files +FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) + +ADD_CUSTOM_COMMAND( + OUTPUT ${volk_gen_sources} + DEPENDS ${xml_files} ${py_files} ${h_files} + COMMAND ${PYTHON_EXECUTABLE} -B + ${CMAKE_SOURCE_DIR}/gen/volk_register.py + ${CMAKE_BINARY_DIR} +) + +######################################################################## +# Handle orc support +######################################################################## +FIND_PACKAGE(PkgConfig) +IF(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4") +ENDIF(PKG_CONFIG_FOUND) + +FIND_PROGRAM(ORCC_EXECUTABLE orcc) + +IF(ORC_FOUND AND ORCC_EXECUTABLE) + #setup orc library usage + INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) + LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) + ADD_DEFINITIONS(-DLV_HAVE_ORC) + + #setup orc functions + FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + FOREACH(orc_file ${orc_files}) + + #extract the name for the generated c source from the orc file + GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE) + SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + + #create a rule to generate the source and add to the list of sources + ADD_CUSTOM_COMMAND( + COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} + DEPENDS ${orc_file} OUTPUT ${orcc_gen} + ) + LIST(APPEND volk_sources ${orcc_gen}) + + ENDFOREACH(orc_file) +ELSE() + MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...") +ENDIF() + +######################################################################## +# Setup the volk sources list and library +######################################################################## +IF(NOT WIN32) + ADD_DEFINITIONS(-fvisibility=hidden) +ENDIF() + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +LIST(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${volk_gen_sources} +) + +#set the machine definitions where applicable +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_BINARY_DIR}/volk.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c +PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") + +IF(MSVC) + #add compatibility includes for stdint types + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) + #compile the sources as C++ due to the lack of complex.h under MSVC + SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) +ENDIF(MSVC) + +#create the volk runtime library +ADD_LIBRARY(volk SHARED ${volk_sources}) +TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) +SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) +SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") + +INSTALL(TARGETS volk + LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file + RUNTIME DESTINATION bin # .dll file +) + +######################################################################## +# Build the QA test application +######################################################################## +FIND_PACKAGE(Boost COMPONENTS unit_test_framework) + +IF(Boost_FOUND) + +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES + COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" +) + +INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) + +ADD_EXECUTABLE(test_all + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) +ADD_TEST(qa_volk_test_all test_all) + +ENDIF() diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am deleted file mode 100644 index 473acd2a6..000000000 --- a/volk/lib/Makefile.am +++ /dev/null @@ -1,158 +0,0 @@ -# -# Copyright 2010,2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -include $(top_srcdir)/Makefile.common - -#FIXME: forcing the top_builddir for distcheck seems like a bit -# of a hack. Figure out the right way to do this to find built -# volk_config.h and volk_tables.h - -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ - -I$(top_builddir)/include \ - $(LV_CXXFLAGS) $(WITH_INCLUDES) - - -# We build 1 library and 1 executable here. The library contains -# everything except the QA code. The C++ QA code is especially recommended -# when you have general purpose C or C++ code that may not get -# thoroughly exercised by building and running a GR block. The -# executable runs the QA code at "make check" time. -# -# -# -# N.B., If there's a SWIG generated shared library and associated -# python code, it will be contained in ../python, not here. (That -# code is conditionally built depending on the state of the -# --without-python configure option.) However, the .i should be here -# next to the .h that it's based on. - - -# list of programs run by "make check" and "make distcheck" -#TESTS = testqa -#orc stuff gets built in the ORC directory conditional to ORC being enabled. -#it gets linked in during the build of libvolk as an added library. -#there might be a better way to do this. - -lib_LTLIBRARIES = \ - libvolk.la \ - libvolk_runtime.la - -EXTRA_DIST = \ - volk_mktables.c \ - volk_rank_archs.h \ - volk_proccpu_sim.c \ - gcc_x86_cpuid.h - -# ---------------------------------------------------------------- -# The main library -# ---------------------------------------------------------------- - -libvolk_runtime_la_SOURCES = \ - $(platform_CODE) \ - volk_runtime.c \ - volk_init.c \ - volk_rank_archs.c - -libvolk_la_SOURCES = \ - $(platform_CODE) \ - volk.c \ - volk_environment_init.c - -volk_orc_LDFLAGS = \ - $(ORC_LDFLAGS) \ - -lorc-0.4 - -volk_orc_LIBADD = \ - ../orc/libvolk_orc.la - -if LV_HAVE_ORC -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_la_LIBADD = $(volk_orc_LIBADD) -else -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_la_LIBADD = -endif - - -# ---------------------------------------------------------------- -# The QA library. Note libvolk.la in LIBADD -# ---------------------------------------------------------------- -#libvolk_qa_la_SOURCES = \ -# qa_utils.cc - -#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost - -#libvolk_qa_la_LIBADD = \ -# libvolk.la \ -# libvolk_runtime.la - -# ---------------------------------------------------------------- -# headers that don't get installed -# ---------------------------------------------------------------- -noinst_HEADERS = \ - volk_init.h \ - qa_utils.h - -# ---------------------------------------------------------------- -# Our test program -# ---------------------------------------------------------------- -noinst_PROGRAMS = \ - testqa - -testqa_SOURCES = testqa.cc qa_utils.cc -testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) \ - $(BOOST_CPPFLAGS) -testqa_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) -if LV_HAVE_ORC -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la \ - ../orc/libvolk_orc.la -else -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la -endif - -distclean-local: - rm -f volk.c - rm -f volk_cpu_generic.c - rm -f volk_cpu_powerpc.c - rm -f volk_cpu_x86.c - rm -f volk_init.c - rm -f volk_init.h - rm -f volk_mktables.c - rm -f volk_proccpu_sim.c - rm -f volk_runtime.c - rm -f volk_tables.h - rm -f volk_environment_init.c -#SUBDIRS = - -#ifdef BUILD_SSE -#SUBDIRS += sse -#elif BUILD_SPU -#SUBDIRS += spu -#else -#SUBDIRS += port -#endif - - diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 154aa0f17..5d5eb7e18 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() { double total; const int vlen = 3200; const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); - short input4[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); - short output2[vlen] __attribute__ ((aligned (16))); - short output3[vlen] __attribute__ ((aligned (16))); - short output01[vlen] __attribute__ ((aligned (16))); - short output11[vlen] __attribute__ ((aligned (16))); - short output21[vlen] __attribute__ ((aligned (16))); - short output31[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 62deffaeb..2e6e6a1a0 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short target3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; - short cntl0[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl1[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl2[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; - short cntl3[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 819b2256b..3cd4e906d 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))); - short cntl0[vlen] __attribute__ ((aligned (16))); - short cntl1[vlen] __attribute__ ((aligned (16))); - short cntl2[vlen] __attribute__ ((aligned (16))); - short cntl3[vlen] __attribute__ ((aligned (16))); - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; for(int i = 0; i < vlen; ++i) { src0[i] = i; diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 66f8c9afa..192a69e35 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = (short) (rand() - (RAND_MAX/2)); diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index 592304f83..a2e7a85be 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index a3d0955bd..981bb19e6 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - std::complex<float> input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen]; - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float scalar = vlen; const float rbw = 1.7; diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 618a82a02..c880260f2 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint32_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t input0; - uint32_t output0 __attribute__ ((aligned (16))); - uint32_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t output0; + __VOLK_ATTR_ALIGNED(16) uint32_t output01; input0 = ((uint32_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 85ef58795..6be4e50ea 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint64_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t input0; - uint64_t output0 __attribute__ ((aligned (16))); - uint64_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t output0; + __VOLK_ATTR_ALIGNED(16) uint64_t output01; input0 = ((uint64_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index b0f63d2b5..db606a472 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,16 +3,16 @@ #include <boost/foreach.hpp> #include <boost/assign/list_of.hpp> #include <boost/tokenizer.hpp> -//#include <boost/test/unit_test.hpp> #include <iostream> #include <vector> #include <list> #include <ctime> #include <cmath> +#include <limits> #include <boost/lexical_cast.hpp> -//#include <volk/volk_runtime.h> -#include <volk/volk_registry.h> #include <volk/volk.h> +#include <volk/volk_cpu.h> +#include <volk/volk_common.h> #include <boost/typeof/typeof.hpp> #include <boost/type_traits.hpp> @@ -62,50 +62,14 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -static std::vector<std::string> get_arch_list(const int archs[]) { +static std::vector<std::string> get_arch_list(struct volk_func_desc desc) { std::vector<std::string> archlist; - int num_archs = archs[0]; - - //there has got to be a way to query these arches - for(int i = 0; i < num_archs; i++) { - switch(archs[i+1]) { - case (1<<LV_GENERIC): - archlist.push_back("generic"); - break; - case (1<<LV_ORC): - archlist.push_back("orc"); - break; - case (1<<LV_SSE): - archlist.push_back("sse"); - break; - case (1<<LV_SSE2): - archlist.push_back("sse2"); - break; - case (1<<LV_SSE3): - archlist.push_back("sse3"); - break; - case (1<<LV_SSSE3): - archlist.push_back("ssse3"); - break; - case (1<<LV_SSE4_1): - archlist.push_back("sse4_1"); - break; - case (1<<LV_SSE4_2): - archlist.push_back("sse4_2"); - break; - case (1<<LV_SSE4_A): - archlist.push_back("sse4_a"); - break; - case (1<<LV_MMX): - archlist.push_back("mmx"); - break; - case (1<<LV_AVX): - archlist.push_back("avx"); - break; - default: - break; - } + + for(int i = 0; i < desc.n_archs; i++) { + //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc + archlist.push_back(std::string(desc.indices[i])); } + return archlist; } @@ -256,7 +220,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i<vlen; i++) { - if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) { + if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl; @@ -269,7 +233,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { class volk_qa_aligned_mem_pool{ public: - void *get_new(size_t size, size_t alignment = 16){ + void *get_new(size_t size, size_t alignment = 32){ _mems.push_back(std::vector<char>(size + alignment-1, 0)); size_t ptr = size_t(&_mems.back().front()); return (void *)((ptr + alignment-1) & ~(alignment-1)); @@ -277,11 +241,19 @@ public: private: std::list<std::vector<char> > _mems; }; -bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector<std::string> *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test - std::vector<std::string> arch_list = get_arch_list(archs); + std::vector<std::string> arch_list = get_arch_list(desc); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; @@ -334,6 +306,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //now run the test clock_t start, end; + std::vector<double> profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -368,8 +341,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -381,7 +358,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, bool fail = false; bool fail_global = false; + std::vector<bool> arch_results; for(int i=0; i<arch_list.size(); i++) { + fail = false; if(i != generic_offset) { for(int j=0; j<both_sigs.size(); j++) { if(both_sigs[j].is_float) { @@ -432,6 +411,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1)); } } + arch_results.push_back(!fail); + } + + double best_time = std::numeric_limits<double>::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 1b64bacaa..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,9 @@ #include <cstdlib> #include <string> +#include <vector> +#include <volk/volk.h> +#include <volk/volk_common.h> struct volk_type_t { bool is_float; @@ -18,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 779bc61eb..349fb0630 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -1,100 +1,93 @@ #include "qa_utils.h" #include <volk/volk.h> -#include <volk/volk_registry.h> #include <boost/test/unit_test.hpp> -BOOST_AUTO_TEST_CASE(volk_test_all) { - //in order... -// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); -// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100); +VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000); +//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50); +VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100); +//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100); +VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000); +//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100); +VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000); +//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100); +VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100); +VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000); +//VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000); +//VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400); +VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400); +VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000); +VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000); -} diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..ebfe3bc40 --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <volk/volk_prefs.h> + +//#if defined(_WIN32) +//#include <Windows.h> +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.volk/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can suck my balls +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index b1a93db26..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,10 +1,40 @@ -#include<volk_rank_archs.h> -#include<stdio.h> +#include <volk_rank_archs.h> +#include <volk/volk_prefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) { - int i = 2; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i<n_archs; i++) { + if(!strncmp(indices[i], arch_name, 20)) { + return i; + } + } + //something terrible should happen here + printf("Volk warning: no arch found, returning generic impl\n"); + return get_index(indices, n_archs, "generic"); //but we'll fake it for now +} + +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { + int i; unsigned int best_val = 0; - for(; i < arch_defs[0] + 1; ++i) { + static struct volk_arch_pref *volk_arch_prefs; + static int n_arch_prefs = 0; + static int prefs_loaded = 0; + if(!prefs_loaded) { + n_arch_prefs = load_preferences(&volk_arch_prefs); + prefs_loaded = 1; + } + + //now look for the function name in the prefs list + for(i=0; i < n_arch_prefs; i++) { + if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it + return get_index(indices, n_archs, volk_arch_prefs[i].arch); + } + } + + for(i=1; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 26b9f7503..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,8 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch); - +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } diff --git a/volk/msvc/inttypes.h b/volk/msvc/inttypes.h new file mode 100644 index 000000000..1c2baa82e --- /dev/null +++ b/volk/msvc/inttypes.h @@ -0,0 +1,301 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <stdint.h> + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/volk/msvc/stdint.h b/volk/msvc/stdint.h new file mode 100644 index 000000000..ab6d37e11 --- /dev/null +++ b/volk/msvc/stdint.h @@ -0,0 +1,251 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#ifndef INTMAX_C +#define INTMAX_C INT64_C +#endif +#ifndef UINTMAX_C +#define UINTMAX_C UINT64_C +#endif + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am index 6b5e4f8b6..960d09b6a 100644 --- a/volk/orc/Makefile.am +++ b/volk/orc/Makefile.am @@ -21,7 +21,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(ORC_CFLAGS) include $(top_srcdir)/Makefile.common -lib_LTLIBRARIES = libvolk_orc.la +noinst_LTLIBRARIES = libvolk_orc.la libvolk_orc_la_LDFLAGS = $(ORC_LDFLAGS) libvolk_orc_la_SOURCES = \ diff --git a/volk/orc/volk_32f_x2_dot_prod_32f_a16_orc_impl.orc b/volk/orc/volk_32f_x2_dot_prod_32f_a16_orc_impl.orc new file mode 100644 index 000000000..d92afbe01 --- /dev/null +++ b/volk/orc/volk_32f_x2_dot_prod_32f_a16_orc_impl.orc @@ -0,0 +1,6 @@ +.function volk_32f_x2_dot_prod_32f_a16_orc_impl +.source 4 src1 +.source 4 src2 +.dest 4 dst +.accumulator 4 accum +addf dst, src1, src2 diff --git a/volk/volk.pc.in b/volk/volk.pc.in index 85425ba64..58e976786 100644 --- a/volk/volk.pc.in +++ b/volk/volk.pc.in @@ -5,11 +5,10 @@ includedir=@includedir@ LV_CXXFLAGS=@LV_CXXFLAGS@ - Name: volk -Description: VOLK.. Vector Optimized Library of Kernels +Description: VOLK: Vector Optimized Library of Kernels Requires: Version: @VERSION@ -Libs: -lvolk -lvolk_runtime @LV_ORC_PKGCONFIG@ +Libs: -lvolk Cflags: -I${includedir} ${LV_CXXFLAGS} |