From 1633e9371a0bce876757f1c2c3e4054436b57950 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 15 Feb 2011 18:48:56 -0800 Subject: cpuid: No more compile-time CPU checks. Compiles everything that gcc allows. Configure- and compile-time checks modified to only compile architectures that the compiler will handle. This means that volk will compile every arch that your gcc will compile, no matter if your CPU can execute the instructions or not. This lets you cross-compile volk. volk_rank_archs will be deferred to runtime. This has not been done yet. --- volk/lib/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 3e5502369..4ffe97d7e 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -68,7 +68,8 @@ libvolk_runtime_la_SOURCES = \ $(platform_CODE) \ volk_runtime.c \ volk_init.c \ - volk_rank_archs.c + volk_rank_archs.c \ + volk_cpu.c libvolk_la_SOURCES = \ $(platform_CODE) \ -- cgit From d364316fa2e047890af09c7bdeed776d6391b3db Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 15 Feb 2011 21:28:43 -0800 Subject: Removed some mktables stuff since it's passe --- volk/lib/Makefile.am | 3 --- 1 file changed, 3 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 4ffe97d7e..af655a1c5 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -55,9 +55,7 @@ lib_LTLIBRARIES = \ libvolk_runtime.la EXTRA_DIST = \ - volk_mktables.c \ volk_rank_archs.h \ - volk_proccpu_sim.c \ gcc_x86_cpuid.h # ---------------------------------------------------------------- @@ -140,7 +138,6 @@ distclean-local: rm -f volk_cpu_x86.c rm -f volk_init.c rm -f volk_init.h - rm -f volk_mktables.c rm -f volk_proccpu_sim.c rm -f volk_runtime.c rm -f volk_tables.h -- cgit From fd03c0ee7cb9af71a5e0292569b626ba3dabd885 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 15 Feb 2011 21:59:19 -0800 Subject: Moved the fn indices gen from volk.c to volk_registry.h so the qa code has access to the static stuff --- volk/lib/qa_utils.cc | 50 ++++++++------------------------------------------ volk/lib/qa_utils.h | 4 ++-- 2 files changed, 10 insertions(+), 44 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index e85e2c1bc..150cef729 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -12,6 +12,7 @@ //#include #include #include +#include #include #include @@ -77,49 +78,14 @@ void make_buffer_for_signature(std::vector &buffs, std::vector get_arch_list(const int archs[]) { +static std::vector get_arch_list(const char **indices, const int archs[]) { std::vector archlist; int num_archs = archs[0]; - - //there has got to be a way to query these arches + for(int i = 0; i < num_archs; i++) { - switch(archs[i+1]) { - case (1< arch_list = get_arch_list(archs); + std::vector arch_list = get_arch_list(indices, archs); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index e2539060a..503beb419 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -18,9 +18,9 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int); +bool run_volk_tests(const char **, const int[], void(*)(), std::string, float, float, int, int); -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_indices, func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); -- cgit From 88c389e1166cf5427cfabc012502337999c2b68f Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 15 Feb 2011 22:25:54 -0800 Subject: Revert "Removed some mktables stuff since it's passe" This reverts commit d364316fa2e047890af09c7bdeed776d6391b3db. --- volk/lib/Makefile.am | 3 +++ 1 file changed, 3 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index af655a1c5..4ffe97d7e 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -55,7 +55,9 @@ lib_LTLIBRARIES = \ libvolk_runtime.la EXTRA_DIST = \ + volk_mktables.c \ volk_rank_archs.h \ + volk_proccpu_sim.c \ gcc_x86_cpuid.h # ---------------------------------------------------------------- @@ -138,6 +140,7 @@ distclean-local: rm -f volk_cpu_x86.c rm -f volk_init.c rm -f volk_init.h + rm -f volk_mktables.c rm -f volk_proccpu_sim.c rm -f volk_runtime.c rm -f volk_tables.h -- cgit From 1bf5a3ea4bbb9fd6baf72eb876778d2652c97f2c Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 15 Feb 2011 22:28:38 -0800 Subject: reverted mktables deletion until i get my act together and make it go straight from python -> .h --- volk/lib/qa_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 150cef729..f8fcc7b28 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -83,10 +83,10 @@ static std::vector get_arch_list(const char **indices, const int ar int num_archs = archs[0]; for(int i = 0; i < num_archs; i++) { - //std::cout << "the archs this fn is avail on is " << archs[0] << std::endl; if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc archlist.push_back(std::string(indices[i])); } + return archlist; } -- cgit From 8ac430070308f83e8d88f6ae8cf802ccf5a9cf82 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 16 Feb 2011 15:20:10 -0800 Subject: take libvolk_runtime out of the testqa linkage --- volk/lib/Makefile.am | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 4ffe97d7e..2338ac57c 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -129,8 +129,7 @@ testqa_LDADD = \ ../orc/libvolk_orc.la else testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la + libvolk.la endif distclean-local: -- cgit From 0eeeb636a89c5086293bae31511316e4200ad2f9 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 16 Feb 2011 17:20:09 -0800 Subject: Volk_runtime now does self-initialization. You can call volk_xxx_a16() just like in volk.c. The first run of each function does the rank_archs call. Subsequent calls proceed with no overhead. volk_init is still being generated but not used at all. --- volk/lib/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index 2338ac57c..cf8c4f407 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -53,6 +53,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ lib_LTLIBRARIES = \ libvolk.la \ libvolk_runtime.la + libvolk_orc.la EXTRA_DIST = \ volk_mktables.c \ @@ -67,7 +68,6 @@ EXTRA_DIST = \ libvolk_runtime_la_SOURCES = \ $(platform_CODE) \ volk_runtime.c \ - volk_init.c \ volk_rank_archs.c \ volk_cpu.c -- cgit From cef9e33e01e946d82564f517de501cafcb5b1f32 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 9 Mar 2011 17:30:02 -0800 Subject: Volk: first steps to conditional compilation/multiple obj files. --- volk/lib/Makefile.am | 81 +++++++--------------------------------------------- 1 file changed, 11 insertions(+), 70 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am index cf8c4f407..298895e2a 100644 --- a/volk/lib/Makefile.am +++ b/volk/lib/Makefile.am @@ -1,23 +1,7 @@ -# -# Copyright 2010,2011 Free Software Foundation, Inc. -# -# This file is part of GNU Radio -# -# GNU Radio is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GNU Radio is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# +# This file is automatically generated by make_makefile_am.py. +# Do not edit this file. + include $(top_srcdir)/Makefile.common #FIXME: forcing the top_builddir for distcheck seems like a bit @@ -26,34 +10,11 @@ include $(top_srcdir)/Makefile.common AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ -I$(top_builddir)/include \ - $(LV_CXXFLAGS) $(WITH_INCLUDES) - - -# We build 1 library and 1 executable here. The library contains -# everything except the QA code. The C++ QA code is especially recommended -# when you have general purpose C or C++ code that may not get -# thoroughly exercised by building and running a GR block. The -# executable runs the QA code at "make check" time. -# -# -# -# N.B., If there's a SWIG generated shared library and associated -# python code, it will be contained in ../python, not here. (That -# code is conditionally built depending on the state of the -# --without-python configure option.) However, the .i should be here -# next to the .h that it's based on. - - -# list of programs run by "make check" and "make distcheck" -#TESTS = testqa -#orc stuff gets built in the ORC directory conditional to ORC being enabled. -#it gets linked in during the build of libvolk as an added library. -#there might be a better way to do this. + $(WITH_INCLUDES) lib_LTLIBRARIES = \ libvolk.la \ libvolk_runtime.la - libvolk_orc.la EXTRA_DIST = \ volk_mktables.c \ @@ -82,6 +43,7 @@ volk_orc_LDFLAGS = \ volk_orc_LIBADD = \ ../orc/libvolk_orc.la + if LV_HAVE_ORC libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) @@ -97,39 +59,29 @@ endif # ---------------------------------------------------------------- # The QA library. Note libvolk.la in LIBADD # ---------------------------------------------------------------- -#libvolk_qa_la_SOURCES = \ -# qa_utils.cc +#libvolk_qa_la_SOURCES = # qa_utils.cc #libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost -#libvolk_qa_la_LIBADD = \ -# libvolk.la \ -# libvolk_runtime.la +#libvolk_qa_la_LIBADD = # libvolk.la # libvolk_runtime.la # ---------------------------------------------------------------- # headers that don't get installed # ---------------------------------------------------------------- -noinst_HEADERS = \ - volk_init.h \ - qa_utils.h +noinst_HEADERS = volk_init.h qa_utils.h # ---------------------------------------------------------------- # Our test program # ---------------------------------------------------------------- -noinst_PROGRAMS = \ - testqa +noinst_PROGRAMS = testqa testqa_SOURCES = testqa.cc qa_utils.cc testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB) if LV_HAVE_ORC -testqa_LDADD = \ - libvolk.la \ - libvolk_runtime.la \ - ../orc/libvolk_orc.la +testqa_LDADD = libvolk.la libvolk_runtime.la ../orc/libvolk_orc.la else -testqa_LDADD = \ - libvolk.la +testqa_LDADD = libvolk.la endif distclean-local: @@ -144,14 +96,3 @@ distclean-local: rm -f volk_runtime.c rm -f volk_tables.h rm -f volk_environment_init.c -#SUBDIRS = - -#ifdef BUILD_SSE -#SUBDIRS += sse -#elif BUILD_SPU -#SUBDIRS += spu -#else -#SUBDIRS += port -#endif - - -- cgit From 5fffe801f95f2ef8bddf51aea8ed260eae0bf7b8 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 13 Apr 2011 18:32:28 -0700 Subject: Volk: make_makefile_am.py changes to generate cflags, ldflags. no conditional linking yet. --- volk/lib/Makefile.am | 99 ---------------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 volk/lib/Makefile.am (limited to 'volk/lib') diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am deleted file mode 100644 index b032eefe6..000000000 --- a/volk/lib/Makefile.am +++ /dev/null @@ -1,99 +0,0 @@ - -# This file is automatically generated by make_makefile_am.py. -# Do not edit this file. - -include $(top_srcdir)/Makefile.common - -#FIXME: forcing the top_builddir for distcheck seems like a bit -# of a hack. Figure out the right way to do this to find built -# volk_config.h and volk_tables.h - -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \ - -I$(top_builddir)/include \ - $(WITH_INCLUDES) - -lib_LTLIBRARIES = \ - libvolk.la \ - libvolk_runtime.la - -EXTRA_DIST = \ - volk_mktables.c \ - volk_rank_archs.h \ - volk_proccpu_sim.c \ - gcc_x86_cpuid.h - -# ---------------------------------------------------------------- -# The main library -# ---------------------------------------------------------------- - -libvolk_runtime_la_SOURCES = \ - $(platform_CODE) \ - volk_runtime.c \ - volk_rank_archs.c \ - volk_cpu.c - -libvolk_la_SOURCES = \ - $(platform_CODE) \ - volk.c \ - volk_environment_init.c - -volk_orc_LDFLAGS = \ - $(ORC_LDFLAGS) \ - -lorc-0.4 - -volk_orc_LIBADD = \ - ../orc/libvolk_orc.la - - -if LV_HAVE_ORC -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS) -libvolk_la_LIBADD = $(volk_orc_LIBADD) -else -libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -libvolk_la_LIBADD = -endif - - -# ---------------------------------------------------------------- -# The QA library. Note libvolk.la in LIBADD -# ---------------------------------------------------------------- -#libvolk_qa_la_SOURCES = # qa_utils.cc - -#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost - -#libvolk_qa_la_LIBADD = # libvolk.la # libvolk_runtime.la - -# ---------------------------------------------------------------- -# headers that don't get installed -# ---------------------------------------------------------------- -noinst_HEADERS = volk_init.h qa_utils.h - -# ---------------------------------------------------------------- -# Our test program -# ---------------------------------------------------------------- -noinst_PROGRAMS = testqa - -testqa_SOURCES = testqa.cc qa_utils.cc -testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS) \ - $(BOOST_CPPFLAGS) -testqa_LDFLAGS = $(BOOST_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) -if LV_HAVE_ORC -testqa_LDADD = libvolk.la libvolk_runtime.la ../orc/libvolk_orc.la -else -testqa_LDADD = libvolk.la -endif - -distclean-local: - rm -f volk.c - rm -f volk_cpu_generic.c - rm -f volk_cpu_powerpc.c - rm -f volk_cpu_x86.c - rm -f volk_init.c - rm -f volk_init.h - rm -f volk_mktables.c - rm -f volk_proccpu_sim.c - rm -f volk_runtime.c - rm -f volk_tables.h - rm -f volk_environment_init.c -- cgit From 8b04d29221719239b52a300a338513f05746ed7f Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Sat, 16 Apr 2011 10:05:43 -0700 Subject: Volk: split n_archs out of arch_defs[0], began to add _manual support --- volk/lib/volk_rank_archs.c | 6 +++--- volk/lib/volk_rank_archs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index b1a93db26..25ad75cda 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,10 +1,10 @@ #include #include -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) { - int i = 2; +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { + int i = 1; unsigned int best_val = 0; - for(; i < arch_defs[0] + 1; ++i) { + for(; i < n_archs; ++i) { if((arch_defs[i]&(!arch)) == 0) { best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 26b9f7503..8fa0631ee 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,7 +5,7 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch); +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); #ifdef __cplusplus -- cgit From 668da8bd8874ae71a819d55f046e39c964a4270b Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Sat, 16 Apr 2011 10:47:10 -0700 Subject: Volk: manual funcs implemented, QA code runs. Barfs due to missing Orc arch. --- volk/lib/qa_utils.cc | 14 +++++++------- volk/lib/qa_utils.h | 6 ++++-- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index acf72cfe1..2dee61823 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -63,13 +64,12 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) { } } -static std::vector get_arch_list(const int archs[]) { +static std::vector get_arch_list(struct volk_func_desc desc) { std::vector archlist; - int num_archs = archs[0]; - for(int i = 0; i < num_archs; i++) { - if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc - archlist.push_back(std::string(indices[i])); + for(int i = 0; i < desc.n_archs; i++) { + //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc + archlist.push_back(std::string(desc.indices[i])); } return archlist; @@ -243,11 +243,11 @@ public: private: std::list > _mems; }; -bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test - std::vector arch_list = get_arch_list(indices, archs); + std::vector arch_list = get_arch_list(desc); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index ed1ab373b..5103589cc 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,8 @@ #include #include +#include +#include struct volk_type_t { bool is_float; @@ -18,9 +20,9 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(const char **, const int[], void(*)(), std::string, float, float, int, int); +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_indices, func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); -- cgit From 52f77cf961279fc9167b101df6c7a6282a1fb0ad Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Sat, 16 Apr 2011 14:31:20 -0700 Subject: Volk: Orc support back in there, QA code now runs, functionally complete --- volk/lib/testqa.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 779bc61eb..f04c380ac 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); + //VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); @@ -53,7 +53,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); // VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 1000); VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); -- cgit From 513020d87912569691601cc2c49ca3331959fa63 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Sat, 16 Apr 2011 23:05:09 -0700 Subject: Volk: modified archs.xml to put Orc higher prio than old SSE/SSE2, since Orc gives better results than those platforms on avg Test cases changed to take ~1s each on my i7 laptop --- volk/lib/testqa.cc | 164 ++++++++++++++++++++++++++--------------------------- 1 file changed, 82 insertions(+), 82 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index f04c380ac..c746cc336 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -7,94 +7,94 @@ BOOST_AUTO_TEST_CASE(volk_test_all) { //in order... // VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); // VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); + VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100); + VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000); + VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000); // VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); // VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000); + VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50); + VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100); //VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100); + VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000); // VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 1000); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100); + VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000); // VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000); + VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000); + VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000); + VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000); + VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100); + VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100); + VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000); + VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000); + VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000); + VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000); + VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000); + VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000); // VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000); + VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000); // VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000); + VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000); + VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); + VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400); + VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400); + VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000); + VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000); + VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000); } -- cgit From 208dcc510e6b6beedf2479a598a90d32c19e1274 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 17 Apr 2011 17:29:45 -0700 Subject: volk: updated lib and include .gitignores for in-tree build --- volk/lib/.gitignore | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index 6a5fde28f..c676a61df 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -8,16 +8,11 @@ /.lo /Makefile /Makefile.in -/volk.c -/volk_cpu_generic.c -/volk_cpu_powerpc.c -/volk_cpu_x86.c +/Makefile.am /volk_environment_init.c -/volk_init.c /volk_init.h -/volk_mktables -/volk_mktables.c -/volk_proccpu_sim.c -/volk_runtime.c -/test_all /testqa +/volk.cc +/volk_cpu.c +/volk_machine_*.cc +/volk_machines.cc -- cgit From 0b3e4f25eea1eef3e8a45fdb6d1bcc1ec57d1321 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 17 Apr 2011 23:03:18 -0700 Subject: volk: replace references to __attribute__((aligned... with cross platform macro This replaces all aligned(16) and aligned(128) (which were incorrect). Attribute macros are on the left because this makes gcc and ms compilers happy. --- volk/lib/qa_16s_add_quad_aligned16.cc | 26 +++++++++++----------- volk/lib/qa_16s_branch_4_state_8_aligned16.cc | 20 ++++++++--------- .../lib/qa_16s_permute_and_scalar_add_aligned16.cc | 18 +++++++-------- volk/lib/qa_16s_quad_max_star_aligned16.cc | 12 +++++----- volk/lib/qa_32f_fm_detect_aligned16.cc | 6 ++--- ...qa_32fc_power_spectral_density_32f_aligned16.cc | 6 ++--- volk/lib/qa_32u_popcnt_aligned16.cc | 6 ++--- volk/lib/qa_64u_popcnt_aligned16.cc | 6 ++--- 8 files changed, 50 insertions(+), 50 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc index 154aa0f17..5d5eb7e18 100644 --- a/volk/lib/qa_16s_add_quad_aligned16.cc +++ b/volk/lib/qa_16s_add_quad_aligned16.cc @@ -22,20 +22,20 @@ void qa_16s_add_quad_aligned16::t1() { double total; const int vlen = 3200; const int ITERS = 100000; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); - short input4[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); - short output2[vlen] __attribute__ ((aligned (16))); - short output3[vlen] __attribute__ ((aligned (16))); - short output01[vlen] __attribute__ ((aligned (16))); - short output11[vlen] __attribute__ ((aligned (16))); - short output21[vlen] __attribute__ ((aligned (16))); - short output31[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc index 62deffaeb..2e6e6a1a0 100644 --- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc +++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -29,22 +29,22 @@ void qa_16s_branch_4_state_8_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short target3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; - short cntl0[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl1[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - short cntl2[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; - short cntl3[vlen] __attribute__ ((aligned (16))) = { + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc index 819b2256b..3cd4e906d 100644 --- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc +++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -23,15 +23,15 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() { clock_t start, end; double total; - short target[vlen] __attribute__ ((aligned (16))); - short target2[vlen] __attribute__ ((aligned (16))); - short src0[vlen] __attribute__ ((aligned (16))); - short permute_indexes[vlen] __attribute__ ((aligned (16))); - short cntl0[vlen] __attribute__ ((aligned (16))); - short cntl1[vlen] __attribute__ ((aligned (16))); - short cntl2[vlen] __attribute__ ((aligned (16))); - short cntl3[vlen] __attribute__ ((aligned (16))); - short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4}; + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; for(int i = 0; i < vlen; ++i) { src0[i] = i; diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc index 66f8c9afa..192a69e35 100644 --- a/volk/lib/qa_16s_quad_max_star_aligned16.cc +++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc @@ -17,13 +17,13 @@ void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1() { const int vlen = 34; - short input0[vlen] __attribute__ ((aligned (16))); - short input1[vlen] __attribute__ ((aligned (16))); - short input2[vlen] __attribute__ ((aligned (16))); - short input3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; - short output0[vlen] __attribute__ ((aligned (16))); - short output1[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; for(int i = 0; i < vlen; ++i) { short plus0 = (short) (rand() - (RAND_MAX/2)); diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc index 592304f83..a2e7a85be 100644 --- a/volk/lib/qa_32f_fm_detect_aligned16.cc +++ b/volk/lib/qa_32f_fm_detect_aligned16.cc @@ -21,10 +21,10 @@ void qa_32f_fm_detect_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - float input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; - float output0[vlen] __attribute__ ((aligned (16))); - float output01[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc index a3d0955bd..981bb19e6 100644 --- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc +++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -21,10 +21,10 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() { double total; const int vlen = 3201; const int ITERS = 10000; - std::complex input0[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) std::complex input0[vlen]; - float output_generic[vlen] __attribute__ ((aligned (16))); - float output_sse3[vlen] __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float scalar = vlen; const float rbw = 1.7; diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc index 618a82a02..c880260f2 100644 --- a/volk/lib/qa_32u_popcnt_aligned16.cc +++ b/volk/lib/qa_32u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_32u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint32_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t input0; - uint32_t output0 __attribute__ ((aligned (16))); - uint32_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint32_t output0; + __VOLK_ATTR_ALIGNED(16) uint32_t output01; input0 = ((uint32_t) (rand() - (RAND_MAX/2))); output0 = 0; diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc index 85ef58795..6be4e50ea 100644 --- a/volk/lib/qa_64u_popcnt_aligned16.cc +++ b/volk/lib/qa_64u_popcnt_aligned16.cc @@ -25,10 +25,10 @@ void qa_64u_popcnt_aligned16::t1() { double total; const int ITERS = 10000000; - uint64_t input0 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t input0; - uint64_t output0 __attribute__ ((aligned (16))); - uint64_t output01 __attribute__ ((aligned (16))); + __VOLK_ATTR_ALIGNED(16) uint64_t output0; + __VOLK_ATTR_ALIGNED(16) uint64_t output01; input0 = ((uint64_t) (rand() - (RAND_MAX/2))); output0 = 0; -- cgit From a5e2d9e5baf869ae961fbb5820447290d6d9c7c8 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Tue, 26 Apr 2011 21:55:48 -0700 Subject: volk: reorganization of generation sources and generated files All generation sources have been moved to the gen/ subdirectory. Bootstrap and volk_register.py generate the files into to gen/ subdirectory in an effort to cleanly separate the static/generated parts of the build tree. Define top_gendir in Makefile.common, all generated sources listed in Makefile.ams are prefixed with $(top_gendir) to differentiate them from static in-tree sources. --- volk/lib/.gitignore | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore index c676a61df..28ec6ddaa 100644 --- a/volk/lib/.gitignore +++ b/volk/lib/.gitignore @@ -1,18 +1,4 @@ -/*.cache -/*.la -/*.lo -/*.pc -/.deps -/.la -/.libs -/.lo /Makefile /Makefile.in /Makefile.am -/volk_environment_init.c -/volk_init.h /testqa -/volk.cc -/volk_cpu.c -/volk_machine_*.cc -/volk_machines.cc -- cgit From 12413747c90754482582e16c95b551e1b36c6074 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Fri, 6 May 2011 11:25:00 -0700 Subject: volk: removed volk_registry.h, it was superseded by the machines --- volk/lib/qa_utils.cc | 3 --- volk/lib/testqa.cc | 1 - 2 files changed, 4 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 2dee61823..b195ab365 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -3,15 +3,12 @@ #include #include #include -//#include #include #include #include #include #include #include -//#include -#include #include #include #include diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index c746cc336..47d3c1328 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -1,6 +1,5 @@ #include "qa_utils.h" #include -#include #include BOOST_AUTO_TEST_CASE(volk_test_all) { -- cgit From c40ef84defaeed0c9ec70e45a7e4019fa6d6e1b2 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Fri, 6 May 2011 14:27:48 -0700 Subject: volk: various backports from MSVC building 1) Added support for __cpuid intrinsic under MSVC 2) Fixed disambiguation for std::abs overload in qa code 3) Fixed bit128 union, the ifdefs were completely wrong --- volk/lib/qa_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index b195ab365..fa091ad0d 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -219,7 +219,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { + if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])) << std::endl; -- cgit From 52b4aee1bde356f05bb017536968a8ec77e3878b Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Mon, 9 May 2011 18:44:08 -0700 Subject: Volk: Each QA test runs in separate Boost test case so it prints statistics for you. --- volk/lib/qa_utils.h | 2 +- volk/lib/testqa.cc | 182 +++++++++++++++++++++++++--------------------------- 2 files changed, 89 insertions(+), 95 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 5103589cc..304a00533 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -22,7 +22,7 @@ void random_floats(float *buf, unsigned n); bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0) +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); } typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc index 47d3c1328..349fb0630 100644 --- a/volk/lib/testqa.cc +++ b/volk/lib/testqa.cc @@ -2,98 +2,92 @@ #include #include -BOOST_AUTO_TEST_CASE(volk_test_all) { - //in order... -// VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); - VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100); - VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000); - VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000); -// VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); -// VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50); - VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100); - //VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100); - VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000); -// VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100); - VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000); -// VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); - VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000); - VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000); - VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000); - VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000); - VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100); - VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100); - VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000); - VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000); - VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000); - VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000); - VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000); - VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000); -// VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000); - VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000); -// VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000); - VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000); - VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); - VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400); - VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400); - VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000); - VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000); - VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000); +//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000); +VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100); +VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000); +//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000); +//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50); +VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100); +//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100); +VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000); +//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100); +VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000); +//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000); +VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100); +VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100); +VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000); +VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000); +VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000); +VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 204600, 2000); +//VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000); +VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 204600, 1000); +//VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000); +VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000); +VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400); +VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400); +VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 204600, 20000); +VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000); +VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000); -} -- cgit From 21d6870a6ef5284a5941dce1484bcfff6684ffea Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 9 May 2011 20:31:56 -0700 Subject: volk: cmake support for volk (gcc + msvc) --- volk/lib/CMakeLists.txt | 193 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 volk/lib/CMakeLists.txt (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt new file mode 100644 index 000000000..2019f201a --- /dev/null +++ b/volk/lib/CMakeLists.txt @@ -0,0 +1,193 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +######################################################################## +# Parse the arches xml file: +# Test each arch to see if the compiler supports the flag. +# If the test passes append the arch to the available list. +######################################################################## +#extract the arch lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('flag')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/archs.xml').getElementsByTagName('arch')))" + OUTPUT_VARIABLE arch_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +#This macro sets the ${arch}_flag variable, +#and handles special cases for MSVC arch flags. +MACRO(set_arch_flag name flag) + IF(MSVC AND ${name} STREQUAL "mmx") + SET(${name}_flag "/arch:SSE") #no /arch:MMX + ELSEIF(MSVC AND ${name} STREQUAL "sse") + SET(${name}_flag "/arch:SSE") + ELSEIF(MSVC AND ${name} STREQUAL "sse2") + SET(${name}_flag "/arch:SSE2") + ELSE() + SET(${name}_flag -${flag}) + ENDIF() +ENDMACRO(set_arch_flag) + +MACRO(handle_arch name flag) + + #handle special case for none flag + IF(${flag} STREQUAL "none") + SET(have_${name} TRUE) + + #otherwise test the flag against the compiler + ELSE() + INCLUDE(CheckCXXCompilerFlag) + set_arch_flag(${name} ${flag}) + CHECK_CXX_COMPILER_FLAG(${${name}_flag} have_${name}) + ENDIF() + + IF(have_${name}) + LIST(APPEND available_arches ${name}) + ENDIF() +ENDMACRO(handle_arch) + +#create a list of available arches +FOREACH(arch_line ${arch_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${arch_line}") + handle_arch(${args}) +ENDFOREACH(arch_line) + +MESSAGE(STATUS "Available arches: ${available_arches}") + +######################################################################## +# Parse the machines xml file: +# Test each machine to see if its arch dependencies are supported. +# Build a list of supported machines and the machine definitions. +######################################################################## +#extract the machine lines from the xml file using crazy python +EXECUTE_PROCESS( + COMMAND ${PYTHON_EXECUTABLE} -c + "from xml.dom import minidom; print ';'.join(map(lambda a: '%s %s'%(a.attributes['name'].value,a.getElementsByTagName('archs')[0].firstChild.data),minidom.parse('${CMAKE_SOURCE_DIR}/gen/machines.xml').getElementsByTagName('machine')))" + OUTPUT_VARIABLE machine_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +MACRO(handle_machine1 name) + UNSET(machine_flags) + STRING(TOUPPER LV_MACHINE_${name} machine_def) + + #check if all the arches are supported + FOREACH(arch ${ARGN}) + SET(is_match ${have_${arch}}) + IF(NOT is_match) + SET(is_match FALSE) + BREAK() + ENDIF(NOT is_match) + SET(machine_flags "${machine_flags} ${${arch}_flag}") + ENDFOREACH(arch) + + IF(is_match) + #this is a match, append the source and set its flags + SET(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${name}.c) + SET_SOURCE_FILES_PROPERTIES(${machine_source} PROPERTIES COMPILE_FLAGS ${machine_flags}) + LIST(APPEND machine_sources ${machine_source}) + LIST(APPEND machine_defs ${machine_def}) + LIST(APPEND available_machines ${name}) + ENDIF() +ENDMACRO(handle_machine1) + +MACRO(handle_machine name) + SET(arches ${ARGN}) + LIST(FIND arches "32|64" index) + IF(${index} EQUAL -1) + handle_machine1(${name} ${arches}) + ELSE() + LIST(REMOVE_ITEM arches "32|64") + handle_machine1(${name}_32 32 ${arches}) + handle_machine1(${name}_64 64 ${arches}) + ENDIF() +ENDMACRO(handle_machine) + +#setup the available machines +FOREACH(machine_line ${machine_lines}) + SEPARATE_ARGUMENTS(args UNIX_COMMAND "${machine_line}") + handle_machine(${args}) +ENDFOREACH(machine_line) + +MESSAGE(STATUS "Available machines: ${available_machines}") + +######################################################################## +# Setup volk and volk-runtime libraries +######################################################################## +IF(MSVC) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) +ENDIF(MSVC) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +) + +SET(volk_sources + ${CMAKE_CURRENT_BINARY_DIR}/volk.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_cpu.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c +) + +#set the machine definitions but only on the non-machine sources +SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") + +#append the generated machine sources to volk sources and set generated +LIST(APPEND volk_sources ${machine_sources}) +SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES GENERATED TRUE) + +#compile the sources as C++ due to the lack of complex.h for non GCC +IF(MSVC) +SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) +ENDIF(MSVC) + +#create the volk runtime library +ADD_LIBRARY(volk SHARED ${volk_sources}) +ADD_DEPENDENCIES(volk volk_register) +SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) +SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") + +INSTALL(TARGETS volk + LIBRARY DESTINATION lib${LIB_SUFFIX} # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} # .lib file + RUNTIME DESTINATION bin # .dll file +) + +######################################################################## +# Build the QA test application +######################################################################## +FIND_PACKAGE(Boost COMPONENTS unit_test_framework) + +IF(Boost_FOUND) + +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES + COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" +) + +INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) + +ADD_EXECUTABLE(test_all + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) +ADD_TEST(qa_volk_test_all test_all) + +ENDIF() -- cgit From 51f9487bc6b8162ba10b8841475abcc26b220459 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Tue, 10 May 2011 14:23:12 -0700 Subject: volk: added orc support to the cmake build --- volk/lib/CMakeLists.txt | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index 2019f201a..dfb1d4219 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -123,6 +123,42 @@ ENDFOREACH(machine_line) MESSAGE(STATUS "Available machines: ${available_machines}") +######################################################################## +# Handle orc support +######################################################################## +FIND_PACKAGE(PkgConfig) +IF(PKG_CONFIG_FOUND) +PKG_CHECK_MODULES(ORC "orc-0.4") +ENDIF(PKG_CONFIG_FOUND) + +FIND_PROGRAM(ORCC_EXECUTABLE orcc) + +IF(ORC_FOUND AND ORCC_EXECUTABLE) + #setup orc library usage + INCLUDE_DIRECTORIES(${ORC_INCLUDE_DIRS}) + LINK_DIRECTORIES(${ORC_LIBRARY_DIRS}) + ADD_DEFINITIONS(-DLV_HAVE_ORC) + + #setup orc functions + FILE(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + FOREACH(orc_file ${orc_files}) + + #extract the name for the generated c source from the orc file + GET_FILENAME_COMPONENT(orc_file_name_we ${orc_file} NAME_WE) + SET(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + + #create a rule to generate the source and add to the list of sources + ADD_CUSTOM_COMMAND( + COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} + DEPENDS ${orc_file} OUTPUT ${orcc_gen} + ) + LIST(APPEND machine_sources ${orcc_gen}) + + ENDFOREACH(orc_file) +ELSE() + MESSAGE(STATUS "Did not find liborc and orcc, disabling orc support...") +ENDIF() + ######################################################################## # Setup volk and volk-runtime libraries ######################################################################## @@ -159,6 +195,7 @@ ENDIF(MSVC) #create the volk runtime library ADD_LIBRARY(volk SHARED ${volk_sources}) ADD_DEPENDENCIES(volk volk_register) +TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") -- cgit From a56dad2b5caf5167f4c93a15b86c304c0077a0a6 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Tue, 10 May 2011 18:24:35 -0700 Subject: volk: move generation rules into lib + cleanup --- volk/lib/CMakeLists.txt | 62 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index dfb1d4219..b0d60781f 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -123,6 +123,36 @@ ENDFOREACH(machine_line) MESSAGE(STATUS "Available machines: ${available_machines}") +######################################################################## +# Create rules to run the volk generator +######################################################################## +#list of the generated sources +SET(volk_gen_sources + ${CMAKE_BINARY_DIR}/include/volk/volk.h + ${CMAKE_BINARY_DIR}/lib/volk.c + ${CMAKE_BINARY_DIR}/lib/volk_init.h + ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h + ${CMAKE_BINARY_DIR}/lib/volk_cpu.c + ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c + ${CMAKE_BINARY_DIR}/include/volk/volk_environment_init.h + ${CMAKE_BINARY_DIR}/include/volk/volk_machines.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.c + ${machine_sources} +) + +#dependencies are all python and xml files +FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) + +ADD_CUSTOM_COMMAND( + OUTPUT ${volk_gen_sources} + DEPENDS ${xml_files} ${py_files} + COMMAND ${PYTHON_EXECUTABLE} -B + ${CMAKE_SOURCE_DIR}/gen/volk_register.py + ${CMAKE_BINARY_DIR} +) + ######################################################################## # Handle orc support ######################################################################## @@ -152,7 +182,7 @@ IF(ORC_FOUND AND ORCC_EXECUTABLE) COMMAND ${ORCC_EXECUTABLE} --implementation -o ${orcc_gen} ${orc_file} DEPENDS ${orc_file} OUTPUT ${orcc_gen} ) - LIST(APPEND machine_sources ${orcc_gen}) + LIST(APPEND volk_sources ${orcc_gen}) ENDFOREACH(orc_file) ELSE() @@ -160,12 +190,8 @@ ELSE() ENDIF() ######################################################################## -# Setup volk and volk-runtime libraries +# Setup the volk sources list and library ######################################################################## -IF(MSVC) - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) -ENDIF(MSVC) - INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}/include @@ -173,28 +199,26 @@ INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_BINARY_DIR} ) -SET(volk_sources - ${CMAKE_CURRENT_BINARY_DIR}/volk.c - ${CMAKE_CURRENT_BINARY_DIR}/volk_cpu.c +LIST(APPEND volk_sources ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c - ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c + ${volk_gen_sources} ) -#set the machine definitions but only on the non-machine sources -SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") - -#append the generated machine sources to volk sources and set generated -LIST(APPEND volk_sources ${machine_sources}) -SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES GENERATED TRUE) +#set the machine definitions where applicable +SET_SOURCE_FILES_PROPERTIES( + ${CMAKE_CURRENT_BINARY_DIR}/volk.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c +PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") -#compile the sources as C++ due to the lack of complex.h for non GCC IF(MSVC) -SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) + #add compatibility includes for stdint types + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/msvc) + #compile the sources as C++ due to the lack of complex.h under MSVC + SET_SOURCE_FILES_PROPERTIES(${volk_sources} PROPERTIES LANGUAGE CXX) ENDIF(MSVC) #create the volk runtime library ADD_LIBRARY(volk SHARED ${volk_sources}) -ADD_DEPENDENCIES(volk volk_register) TARGET_LINK_LIBRARIES(volk ${ORC_LIBRARIES}) SET_TARGET_PROPERTIES(volk PROPERTIES SOVERSION ${LIBVER}) SET_TARGET_PROPERTIES(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS") -- cgit From e3997ada93a25569a05bbfd615d73d00cee6eca5 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Tue, 10 May 2011 21:52:23 -0700 Subject: Volk: initial profiling support. Profiling works, reading doesn't yet. Need to add name field to volk arch_defs --- volk/lib/CMakeLists.txt | 6 +++ volk/lib/qa_utils.cc | 35 ++++++++++++- volk/lib/qa_utils.h | 7 +-- volk/lib/volk_profile.cc | 123 +++++++++++++++++++++++++++++++++++++++++++++ volk/lib/volk_rank_archs.c | 59 +++++++++++++++++++++- volk/lib/volk_rank_archs.h | 7 ++- 6 files changed, 229 insertions(+), 8 deletions(-) create mode 100644 volk/lib/volk_profile.cc (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index dfb1d4219..6198dc0e7 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -227,4 +227,10 @@ ADD_EXECUTABLE(test_all TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) ADD_TEST(qa_volk_test_all test_all) +ADD_EXECUTABLE(volk_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc +) +TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) + ENDIF() diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index fa091ad0d..3eb1da1f1 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -240,7 +241,15 @@ public: private: std::list > _mems; }; -bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { +bool run_volk_tests(struct volk_func_desc desc, + void (*manual_func)(), + std::string name, + float tol, + float scalar, + int vlen, + int iter, + std::vector *best_arch_vector = 0 + ) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test @@ -297,6 +306,7 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri //now run the test clock_t start, end; + std::vector profile_times; for(int i = 0; i < arch_list.size(); i++) { start = clock(); @@ -331,8 +341,12 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri } end = clock(); - std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; + double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl; + + profile_times.push_back(arch_time); } + //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; @@ -344,7 +358,9 @@ bool run_volk_tests(struct volk_func_desc desc, void (*manual_func)(), std::stri bool fail = false; bool fail_global = false; + std::vector arch_results; for(int i=0; i::max(); + std::string best_arch = "generic"; + for(int i=0; i < arch_list.size(); i++) { + if((profile_times[i] < best_time) && arch_results[i]) { + best_time = profile_times[i]; + best_arch = arch_list[i]; + } + } + + std::cout << "Best arch: " << best_arch << std::endl; + if(best_arch_vector) { + best_arch_vector->push_back(name + std::string(" ") + best_arch); } return fail_global; diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 304a00533..a1bc1f20c 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -20,10 +21,10 @@ volk_type_t volk_type_from_string(std::string); float uniform(void); void random_floats(float *buf, unsigned n); -bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int); - -#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0); } +bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector *); +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results) typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc new file mode 100644 index 000000000..c4a04abf2 --- /dev/null +++ b/volk/lib/volk_profile.cc @@ -0,0 +1,123 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + + std::vector results; + char path[512]; + get_config_path(path); + std::string config_path(path); + +/* + //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); + VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); + //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); + //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); + VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); + //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); + VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); + //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); + VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); + //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); + VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); + VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); + VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); + VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); + VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); + VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); + //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); + VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); + //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); + VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); + VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); + VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); + VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); + VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); +*/ + VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); + VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); + + std::ofstream config; + std::cout << "filename: " << config_path << std::endl; + config.open(config_path.c_str()); + + config << "\ +#this file is generated by volk_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); + + load_preferences(); +} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 25ad75cda..f505abeb6 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,5 +1,60 @@ -#include -#include +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#endif + +void get_config_path(char *path) { + const char *suffix = "/.gnuradio/volk_config"; + memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); + strcat(path, suffix); +} + +/* + * ok so volk stuff has to be loaded piecemeal, and to avoid reading + * the whole config file in at startup we should probably create a static + * prefs struct that can be read in by rank_archs with minimal modification. + * this makes rank_archs slower and load_preferences more complex, but + * we don't have to export load_preferences and we don't have to include volk.h. + * means we need to pass the name into rank_archs, though + * problem is that names don't appear anywhere in the volk function descriptor. + * so we have to modify things to include the name in the descriptor. + * + * also means you don't have to also spec the fn name in qa_utils.h/c, you can + * pass it in along with the func_desc + * + */ + +void load_preferences(void) { + static int prefs_loaded = 0; + FILE *config_file; + char path[512], line[512], function[256], arch[64]; + + if(prefs_loaded) return; + + int n_arch_preferences = 0; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + printf("func: %s, arch: %s\n", function, arch); + //we have a function and we have an arch, let's set it + n_arch_preferences++; + } + } + + fclose(config_file); + + printf("Found %d prefs\n", n_arch_preferences); + prefs_loaded = 1; +} unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { int i = 1; diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 8fa0631ee..37a0fbc46 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -6,7 +6,12 @@ extern "C" { #endif unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - + +//////////////////////////////////////////////////////////////////////// +//get path to volk_config profiling info +//////////////////////////////////////////////////////////////////////// +void get_config_path(char *); +void load_preferences(void); //FIXME DEBUG shouldn't be exported #ifdef __cplusplus } -- cgit From b50dbc4498842fecd7f0c6adc22f25726f8d27d3 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Wed, 11 May 2011 21:45:03 -0700 Subject: Volk: Profiler is in apps/ now. Added name to function info. Going to C++-ify the whole thing. --- volk/lib/CMakeLists.txt | 6 --- volk/lib/volk_profile.cc | 123 --------------------------------------------- volk/lib/volk_rank_archs.c | 15 +++++- volk/lib/volk_rank_archs.h | 8 +-- 4 files changed, 15 insertions(+), 137 deletions(-) delete mode 100644 volk/lib/volk_profile.cc (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index 6198dc0e7..dfb1d4219 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -227,10 +227,4 @@ ADD_EXECUTABLE(test_all TARGET_LINK_LIBRARIES(test_all volk ${Boost_LIBRARIES}) ADD_TEST(qa_volk_test_all test_all) -ADD_EXECUTABLE(volk_profile - ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc - ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc -) -TARGET_LINK_LIBRARIES(volk_profile volk ${Boost_LIBRARIES}) - ENDIF() diff --git a/volk/lib/volk_profile.cc b/volk/lib/volk_profile.cc deleted file mode 100644 index c4a04abf2..000000000 --- a/volk/lib/volk_profile.cc +++ /dev/null @@ -1,123 +0,0 @@ -#include "qa_utils.h" -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char *argv[]) { - - std::vector results; - char path[512]; - get_config_path(path); - std::string config_path(path); - -/* - //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000, &results); - //VOLK_PROFILE(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 204600, 1000, &results); - VOLK_PROFILE(volk_16ic_deinterleave_real_16i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16ic_magnitude_16i_a16, 1, 0, 204600, 100, &results); - VOLK_PROFILE(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 204600, 1000, &results); - VOLK_PROFILE(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_convert_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_convert_8i_u, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_max_star_16i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_16i_max_star_horizontal_16i_a16, 0, 0, 204600, 10000, &results); - //VOLK_PROFILE(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000, &results); - //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_16u_byteswap_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_accumulator_s32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_x2_add_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 204600, 50, &results); - VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 204600, 100, &results); - //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_index_max_16u_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 204600, 100, &results); - VOLK_PROFILE(volk_32fc_magnitude_32f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_16i_a16, 1, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_convert_64f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_8i_a16, 1, 128, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000, &results); - //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 20460, 100, &results); - VOLK_PROFILE(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_x2_divide_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000, &results); - //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000, &results); - VOLK_PROFILE(volk_32f_index_max_16u_a16, 0, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_x2_interleave_32fc_a16, 0, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x2_max_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_min_32f_a16, 1e-4, 0, 204600, 2000, &results); - VOLK_PROFILE(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_normalize_a16, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32f_s32f_power_32f_a16, 1e-4, 4, 204600, 100, &results); - VOLK_PROFILE(volk_32f_sqrt_32f_a16, 1e-4, 0, 204600, 100, &results); - VOLK_PROFILE(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 204600, 3000, &results); - VOLK_PROFILE(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 204600, 5000, &results); - VOLK_PROFILE(volk_32i_x2_and_32i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000, &results); - VOLK_PROFILE(volk_32i_x2_or_32i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_32u_byteswap_a16, 0, 0, 204600, 2000, &results); - //VOLK_PROFILE(volk_32u_popcnt_a16, 0, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_64f_convert_32f_a16, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_64f_x2_max_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_64f_x2_min_64f_a16, 1e-4, 0, 204600, 1000, &results); - VOLK_PROFILE(volk_64u_byteswap_a16, 0, 0, 204600, 1000, &results); - //VOLK_PROFILE(volk_64u_popcnt_a16, 0, 0, 2046, 10000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_real_16i_a16, 0, 256, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 204600, 3000, &results); - VOLK_PROFILE(volk_8ic_deinterleave_real_8i_a16, 0, 0, 204600, 10000, &results); - VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 204600, 400, &results); - VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 204600, 400, &results); - VOLK_PROFILE(volk_8i_convert_16i_a16, 0, 0, 204600, 20000, &results); - VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results); -*/ - VOLK_PROFILE(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 204600, 2000, &results); - VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results); - - std::ofstream config; - std::cout << "filename: " << config_path << std::endl; - config.open(config_path.c_str()); - - config << "\ -#this file is generated by volk_profile.\n\ -#the function name is followed by the preferred architecture.\n\ -"; - - BOOST_FOREACH(std::string result, results) { - config << result << std::endl; - } - config.close(); - - load_preferences(); -} diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index f505abeb6..14f1789da 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -7,6 +7,8 @@ #include #endif +//this should be used by the profiler app to find the path as well +//possibly all this stuff should go in a separate volk_prefs.cc void get_config_path(char *path) { const char *suffix = "/.gnuradio/volk_config"; memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); @@ -25,8 +27,19 @@ void get_config_path(char *path) { * * also means you don't have to also spec the fn name in qa_utils.h/c, you can * pass it in along with the func_desc + * + * your prefs reader should also have a prefs writer which takes a vector of prefs and writes them + * then your profiler can just write the prefs by passing that out * */ + +struct volk_arch_pref { + const char *name; + const char *arch; +}; + +//if we end up with more this will have to use realloc +struct volk_arch_pref volk_arch_prefs[400]; void load_preferences(void) { static int prefs_loaded = 0; @@ -56,7 +69,7 @@ void load_preferences(void) { prefs_loaded = 1; } -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch) { +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char* name, unsigned int arch) { int i = 1; unsigned int best_val = 0; for(; i < n_archs; ++i) { diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index 37a0fbc46..ba248aa59 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,13 +5,7 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, unsigned int arch); - -//////////////////////////////////////////////////////////////////////// -//get path to volk_config profiling info -//////////////////////////////////////////////////////////////////////// -void get_config_path(char *); -void load_preferences(void); //FIXME DEBUG shouldn't be exported +unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } -- cgit From 30fdc38d20d4e38908059b6e351c550de5741621 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 14:21:17 -0700 Subject: Volk: profiling works. loads prefs on init. volk_rank_archs looks in prefs first. --- volk/lib/CMakeLists.txt | 1 + volk/lib/volk_rank_archs.c | 92 ++++++++++++++-------------------------------- volk/lib/volk_rank_archs.h | 3 +- 3 files changed, 30 insertions(+), 66 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index dfb1d4219..df0a224db 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -176,6 +176,7 @@ INCLUDE_DIRECTORIES( SET(volk_sources ${CMAKE_CURRENT_BINARY_DIR}/volk.c ${CMAKE_CURRENT_BINARY_DIR}/volk_cpu.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c ) diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 14f1789da..1b75af8f4 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -1,78 +1,40 @@ #include +#include #include #include #include -#if defined(_WIN32) -#include -#endif - -//this should be used by the profiler app to find the path as well -//possibly all this stuff should go in a separate volk_prefs.cc -void get_config_path(char *path) { - const char *suffix = "/.gnuradio/volk_config"; - memcpy(path, getenv("HOME"), strlen(getenv("HOME"))+1); - strcat(path, suffix); -} - -/* - * ok so volk stuff has to be loaded piecemeal, and to avoid reading - * the whole config file in at startup we should probably create a static - * prefs struct that can be read in by rank_archs with minimal modification. - * this makes rank_archs slower and load_preferences more complex, but - * we don't have to export load_preferences and we don't have to include volk.h. - * means we need to pass the name into rank_archs, though - * problem is that names don't appear anywhere in the volk function descriptor. - * so we have to modify things to include the name in the descriptor. - * - * also means you don't have to also spec the fn name in qa_utils.h/c, you can - * pass it in along with the func_desc - * - * your prefs reader should also have a prefs writer which takes a vector of prefs and writes them - * then your profiler can just write the prefs by passing that out - * - */ - -struct volk_arch_pref { - const char *name; - const char *arch; -}; - -//if we end up with more this will have to use realloc -struct volk_arch_pref volk_arch_prefs[400]; - -void load_preferences(void) { - static int prefs_loaded = 0; - FILE *config_file; - char path[512], line[512], function[256], arch[64]; - - if(prefs_loaded) return; - - int n_arch_preferences = 0; - - //get the config path - get_config_path(path); - config_file = fopen(path, "r"); - if(!config_file) return; //no prefs found - - while(fgets(line, 512, config_file) != NULL) { - if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { - printf("func: %s, arch: %s\n", function, arch); - //we have a function and we have an arch, let's set it - n_arch_preferences++; +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name) { + int i; + for(i=0; i arch_defs[best_val + 1]) ? i-1 : best_val; } diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h index ba248aa59..546240d2c 100644 --- a/volk/lib/volk_rank_archs.h +++ b/volk/lib/volk_rank_archs.h @@ -5,7 +5,8 @@ extern "C" { #endif -unsigned int volk_rank_archs(const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); +unsigned int get_index(const char *indices[], unsigned int n_archs, const char *arch_name); +unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsigned int n_archs, const char *name, unsigned int arch); #ifdef __cplusplus } -- cgit From c21132e07100c62182a27a8e282cb72463dd2963 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 14:50:48 -0700 Subject: Volk: actually return the preferred arch --- volk/lib/volk_rank_archs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c index 1b75af8f4..e10433fd0 100644 --- a/volk/lib/volk_rank_archs.c +++ b/volk/lib/volk_rank_archs.c @@ -30,7 +30,7 @@ unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsign //now look for the function name in the prefs list for(i=0; i < n_arch_prefs; i++) { if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it - best_val = get_index(indices, n_archs, volk_arch_prefs[i].arch); + return get_index(indices, n_archs, volk_arch_prefs[i].arch); } } -- cgit From a1b9b5c16c53bedfe8ebab39055a36dee387a9a4 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:07:31 -0700 Subject: Volk: forgot to add prefs.c/h to git... --- volk/lib/volk_prefs.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 volk/lib/volk_prefs.c (limited to 'volk/lib') diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c new file mode 100644 index 000000000..bd15c130e --- /dev/null +++ b/volk/lib/volk_prefs.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +//#if defined(_WIN32) +//#include +//#endif + +void get_config_path(char *path) { + const char *suffix = "/.gnuradio/volk_config"; + strcpy(path, getenv("HOME")); + strcat(path, suffix); +} + +//passing by reference in C can suck my balls +int load_preferences(struct volk_arch_pref **prefs) { + FILE *config_file; + char path[512], line[512], function[128], arch[32]; + int n_arch_prefs = 0; + struct volk_arch_pref *t_pref; + + //get the config path + get_config_path(path); + config_file = fopen(path, "r"); + if(!config_file) return; //no prefs found + + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + n_arch_prefs++; + } + } + + //now allocate the memory required for volk_arch_prefs + (*prefs) = (struct volk_arch_pref *) malloc(n_arch_prefs * sizeof(struct volk_arch_pref)); + t_pref = (*prefs); + + //reset the file pointer and write the prefs into volk_arch_prefs + rewind(config_file); + while(fgets(line, 512, config_file) != NULL) { + if(sscanf(line, "%s %s", function, arch) == 2 && !strncmp(function, "volk_", 5)) { + strncpy(t_pref->name, function, 128); + strncpy(t_pref->arch, arch, 32); + t_pref++; + } + } + fclose(config_file); + return n_arch_prefs; +} -- cgit From b0e781a55387e02ef8126219ccfe8b3c48a838f5 Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 15:39:56 -0700 Subject: Volk: move configuration into ~/.volk instead of ~/.gnuradio, add ability to create dir in profiler if not exist --- volk/lib/volk_prefs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c index bd15c130e..ebfe3bc40 100644 --- a/volk/lib/volk_prefs.c +++ b/volk/lib/volk_prefs.c @@ -8,7 +8,7 @@ //#endif void get_config_path(char *path) { - const char *suffix = "/.gnuradio/volk_config"; + const char *suffix = "/.volk/volk_config"; strcpy(path, getenv("HOME")); strcat(path, suffix); } -- cgit From bfb812a4ae8ec750d4452be7ce6b31d33de5796d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Thu, 12 May 2011 16:55:56 -0700 Subject: Volk: changed size of memory alignment in QA code to 32 for AVX support --- volk/lib/qa_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/lib') diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index 3eb1da1f1..db606a472 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -233,7 +233,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { class volk_qa_aligned_mem_pool{ public: - void *get_new(size_t size, size_t alignment = 16){ + void *get_new(size_t size, size_t alignment = 32){ _mems.push_back(std::vector(size + alignment-1, 0)); size_t ptr = size_t(&_mems.back().front()); return (void *)((ptr + alignment-1) & ~(alignment-1)); -- cgit From c00901263795fe436e2d5aea7d2eb08d8bd81f64 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Thu, 12 May 2011 20:11:41 -0700 Subject: volk: added header implementation files to generation rule dependencies --- volk/lib/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index e8fe5cc58..f3ee2ab3d 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -141,13 +141,14 @@ SET(volk_gen_sources ${machine_sources} ) -#dependencies are all python and xml files +#dependencies are all python, xml, and header implementation files FILE(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) FILE(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +FILE(GLOB h_files ${CMAKE_SOURCE_DIR}/include/volk/*.h) ADD_CUSTOM_COMMAND( OUTPUT ${volk_gen_sources} - DEPENDS ${xml_files} ${py_files} + DEPENDS ${xml_files} ${py_files} ${h_files} COMMAND ${PYTHON_EXECUTABLE} -B ${CMAKE_SOURCE_DIR}/gen/volk_register.py ${CMAKE_BINARY_DIR} -- cgit From 9bfe75fd7c6a7069db2d2a98195faabf6ba248e2 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Fri, 13 May 2011 13:58:01 -0700 Subject: volk: do not install library-only headers --- volk/lib/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'volk/lib') diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt index f3ee2ab3d..5dd41be0f 100644 --- a/volk/lib/CMakeLists.txt +++ b/volk/lib/CMakeLists.txt @@ -135,8 +135,8 @@ SET(volk_gen_sources ${CMAKE_BINARY_DIR}/lib/volk_cpu.c ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h ${CMAKE_BINARY_DIR}/lib/volk_environment_init.c - ${CMAKE_BINARY_DIR}/include/volk/volk_environment_init.h - ${CMAKE_BINARY_DIR}/include/volk/volk_machines.h + ${CMAKE_BINARY_DIR}/lib/volk_environment_init.h + ${CMAKE_BINARY_DIR}/lib/volk_machines.h ${CMAKE_BINARY_DIR}/lib/volk_machines.c ${machine_sources} ) @@ -193,6 +193,10 @@ ENDIF() ######################################################################## # Setup the volk sources list and library ######################################################################## +IF(NOT WIN32) + ADD_DEFINITIONS(-fvisibility=hidden) +ENDIF() + INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}/include -- cgit