From e91983371498cfd87d3f4673d6e5874c9ba03ab9 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 15 Apr 2012 13:56:10 -0700 Subject: volk: work on template stuff --- volk/tmpl/volk_cpu.tmpl.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++ volk/tmpl/volk_cpu.tmpl.h | 42 ++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 volk/tmpl/volk_cpu.tmpl.c create mode 100644 volk/tmpl/volk_cpu.tmpl.h (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c new file mode 100644 index 000000000..dc24309f7 --- /dev/null +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -0,0 +1,170 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include + +struct VOLK_CPU volk_cpu; + +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) +# define VOLK_CPU_x86 +#endif + +#if defined(VOLK_CPU_x86) + +//implement get cpuid for gcc compilers using a copy of cpuid.h +#if defined(__GNUC__) +#include +#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) + +//implement get cpuid for MSVC compilers using __cpuid intrinsic +#elif defined(_MSC_VER) +#include +#define cpuid_x86(op, r) __cpuid(r, op) + +#else +#error "A get cpuid for volk is not available on this compiler..." +#endif + +static inline unsigned int cpuid_eax(unsigned int op) { + int regs[4]; + cpuid_x86 (op, regs); + return regs[0]; +} + +static inline unsigned int cpuid_ebx(unsigned int op) { + int regs[4]; + cpuid_x86 (op, regs); + return regs[1]; +} + +static inline unsigned int cpuid_ecx(unsigned int op) { + int regs[4]; + cpuid_x86 (op, regs); + return regs[2]; +} + +static inline unsigned int cpuid_edx(unsigned int op) { + int regs[4]; + cpuid_x86 (op, regs); + return regs[3]; +} +#endif + +#if defined(__arm__) && defined(__linux__) +#include +#include +#include +#define LOOK_FOR_NEON +#endif + +static int has_neon(void){ +#ifdef LOOK_FOR_NEON + FILE *auxvec_f; + unsigned long auxvec[2]; + unsigned int found_neon = 0; + auxvec_f = fopen("/proc/self/auxv", "rb"); + if(!auxvec_f) return 0; + + //so auxv is basically 32b of ID and 32b of value + //so it goes like this + while(!found_neon && auxvec_f) { + fread(auxvec, sizeof(unsigned long), 2, auxvec_f); + if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON)) + found_neon = 1; + } + + fclose(auxvec_f); + return found_neon; + +#else + return 0; +} + +static int has_ppc(void){ +#ifdef __PPC__ + return 1; +#else + return 0; +#endif +} + +#for $arch in $archs +static int i_can_has_$arch.name () { +######################################################################## + #if $arch.type == "x86" and $arch.no_test +#if defined(VOLK_CPU_x86) + return 1; +#else + return 0; +#endif +######################################################################## + #else if $arch.op == 1 +#if defined(VOLK_CPU_x86) + #set $op = hex($arch.op) + unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op); + return ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; +#else + return 0; +#endif +######################################################################## + #else if $arch.op == 0x80000001 +#if defined(VOLK_CPU_x86) + #set $op = hex($arch.op) + unsigned int extended_fct_count = cpuid_eax($op); + if (extended_fct_count < $op) + return $(arch.val)^1; + unsigned int extended_features = cpuid_e$(arch.reg)x ($op); + return ((extended_features >> $arch.shift) & 1) == $arch.val; +#else + return 0; +#endif +######################################################################## + #else if $arch.type == "powerpc" + return has_ppc(); +######################################################################## + #else if $arch.type == "arm" + return has_neon(); +######################################################################## + #else if $arch.type == "all" + return 1; +######################################################################## + #else ##$ + return 0; + #end if +} + +#end for + +void volk_cpu_init() { + #for $arch in $archs + volk_cpu.has_$arch.name = &i_can_has_$arch.name; + #end for +} + +unsigned int volk_get_lvarch() { + unsigned int retval = 0; + volk_cpu_init(); + #for $arch in $archs + retval += volk_cpu.has_$(arch.name)() << LV_$(arch.name.upper()); + #end for + return retval; +} diff --git a/volk/tmpl/volk_cpu.tmpl.h b/volk/tmpl/volk_cpu.tmpl.h new file mode 100644 index 000000000..4d66512e1 --- /dev/null +++ b/volk/tmpl/volk_cpu.tmpl.h @@ -0,0 +1,42 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_CPU_H +#define INCLUDED_VOLK_CPU_H + +#include + +__VOLK_DECL_BEGIN + +struct VOLK_CPU { + #for $arch in $archs + int (*has_$arch.name) (); + #end for +}; + +extern struct VOLK_CPU volk_cpu; + +void volk_cpu_init (); +unsigned int volk_get_lvarch (); + +__VOLK_DECL_END + +#endif /*INCLUDED_VOLK_CPU_H*/ -- cgit From eccc86fbb8aa0392307bfdf1bd802e4394868334 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 15 Apr 2012 15:38:14 -0700 Subject: volk: added kernel defs and typedefs --- volk/tmpl/volk_config_fixed.tmpl.h | 29 +++++++++++++++++++++++++++++ volk/tmpl/volk_typedefs.tmpl.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 volk/tmpl/volk_config_fixed.tmpl.h create mode 100644 volk/tmpl/volk_typedefs.tmpl.h (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_config_fixed.tmpl.h b/volk/tmpl/volk_config_fixed.tmpl.h new file mode 100644 index 000000000..e1c01ae77 --- /dev/null +++ b/volk/tmpl/volk_config_fixed.tmpl.h @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_CONFIG_FIXED_H +#define INCLUDED_VOLK_CONFIG_FIXED_H + +#for $i, $arch in enumerate($archs) +#define LV_$(arch.name.upper()) $i +#end for + +#endif /*INCLUDED_VOLK_CONFIG_FIXED*/ diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h new file mode 100644 index 000000000..947cb9ed5 --- /dev/null +++ b/volk/tmpl/volk_typedefs.tmpl.h @@ -0,0 +1,32 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_TYPEDEFS +#define INCLUDED_VOLK_TYPEDEFS + +#include +#include + +#for $kern in $kernels +typedef $kern.rettype (*$(kern.name.replace('volk_', 'p_')))($kern.arglist); +#end for + +#endif /*INCLUDED_VOLK_TYPEDEFS*/ -- cgit From 95e91b44d2ef3535129c0a402c51bc56cfd74d06 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 15 Apr 2012 17:32:38 -0700 Subject: volk: created other templates for runtime + machines --- volk/tmpl/volk.tmpl.c | 92 +++++++++++++++++++++++++++++++++++++++ volk/tmpl/volk.tmpl.h | 48 ++++++++++++++++++++ volk/tmpl/volk_machine_xxx.tmpl.c | 88 +++++++++++++++++++++++++++++++++++++ volk/tmpl/volk_machines.tmpl.c | 34 +++++++++++++++ volk/tmpl/volk_machines.tmpl.h | 51 ++++++++++++++++++++++ volk/tmpl/volk_typedefs.tmpl.h | 2 +- 6 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 volk/tmpl/volk.tmpl.c create mode 100644 volk/tmpl/volk.tmpl.h create mode 100644 volk/tmpl/volk_machine_xxx.tmpl.c create mode 100644 volk/tmpl/volk_machines.tmpl.c create mode 100644 volk/tmpl/volk_machines.tmpl.h (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c new file mode 100644 index 000000000..161f49a43 --- /dev/null +++ b/volk/tmpl/volk.tmpl.c @@ -0,0 +1,92 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include "volk_machines.h" +#include +#include +#include "volk_rank_archs.h" +#include +#include +#include + +struct volk_machine *get_machine(void) { + extern struct volk_machine *volk_machines[]; + extern unsigned int n_volk_machines; + static struct volk_machine *machine = NULL; + + if(machine != NULL) return machine; + else { + unsigned int max_score = 0; + unsigned int i; + for(i=0; icaps & (~volk_get_lvarch()))) { + if(volk_machines[i]->caps > max_score) { + max_score = volk_machines[i]->caps; + machine = volk_machines[i]; + } + } + } + printf("Using Volk machine: %s\n", machine->name); + return machine; + } +} + +unsigned int volk_get_alignment(void) { + return get_machine()->alignment; +} + +#for $kern in $kernels + +void get_$(kern.name)($kern.arglist_defs) { + $kern.name = get_machine()->$(kern.name)_archs[volk_rank_archs( + get_machine()->$(kern.name)_indices, + get_machine()->$(kern.name)_arch_defs, + get_machine()->$(kern.name)_n_archs, + get_machine()->$(kern.name)_name, + volk_get_lvarch() + )]; + $(kern.name)($kern.arglist_names); +} + +$kern.pname $kern.name = &get_$(kern.name); + +void $(kern.name)_manual($kern.arglist_defs, const char* arch) { + const size_t index = get_index( + get_machine()->$(kern.name)_indices, + get_machine()->$(kern.name)_n_archs, + arch + ); + get_machine()->$(kern.name)_archs[index]( + $kern.arglist_names + ); +} + +struct volk_func_desc volk_32f_x2_add_32f_a_get_func_desc(void) { + struct volk_func_desc desc = { + get_machine()->$(kern.name)_indices, + get_machine()->$(kern.name)_arch_defs, + get_machine()->$(kern.name)_n_archs + }; + return desc; +} + +#end for diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h new file mode 100644 index 000000000..161579e46 --- /dev/null +++ b/volk/tmpl/volk.tmpl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_RUNTIME +#define INCLUDED_VOLK_RUNTIME + +#include +#include +#include +#include + +__VOLK_DECL_BEGIN + +struct volk_func_desc { + const char **indices; + const int *arch_defs; + const int n_archs; +}; + +VOLK_API unsigned int volk_get_alignment(void); + +#for $kern in $kernels +extern VOLK_API $kern.pname $kern.name; +extern VOLK_API void $(kern.name)_manual($kern.arglist_namedefs, const char* arch); +extern VOLK_API struct volk_func_desc $(kern.name)_get_func_desc(void); +#end for + +__VOLK_DECL_END + +#endif /*INCLUDED_VOLK_RUNTIME*/ diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c new file mode 100644 index 000000000..57e652e4c --- /dev/null +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -0,0 +1,88 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#set $this_machine = $machine_dict[$which] + +#for $arch in $this_machine.archs +#define LV_HAVE_$(arch.name.upper()) 1 +#end for + +#include +#include "volk_machines.h" +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#for $kern in $kernels +#include +#end for + +######################################################################## +#def make_arch_have_list($archs) +$(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp +#end def + +######################################################################## +#def make_tag_str_list($tags) +{$(', '.join(['"%s"'%a for a in $tags]))}#slurp +#end def + +######################################################################## +#def make_tag_have_list($deps) +{$(', '.join([' | '.join(['(1 << LV_%s)'%a.upper() for a in d]) for d in $deps]))}#slurp +#end def + +######################################################################## +#def make_tag_kern_list($name, $tags) +{$(', '.join(['%s_%s'%($name, a) for a in $tags]))}#slurp +#end def + +#ifdef LV_HAVE_ORC +struct volk_machine volk_machine_generic = { + $make_arch_have_list($this_machine.archs) | (1 << LV_ORC), + "$this_machine.name", + $this_machine.alignment, + #for $kern in $kernels + "$kern.name", + $make_tag_str_list($kern.taglist), + $make_tag_have_list($kern.tagdeps), + $make_tag_kern_list($kern.name, $kern.taglist), + $(len($kern.taglist)), + #end for +}; + +#else +struct volk_machine volk_machine_generic = { + $make_arch_have_list($this_machine.archs), + "$this_machine.name", + $this_machine.alignment, + #for $kern in $kernels + "$kern.name", + $make_tag_str_list($kern.taglist), + $make_tag_have_list($kern.tagdeps), + $make_tag_kern_list($kern.name, $kern.taglist), + $(len($kern.taglist)), + #end for +}; + +#endif diff --git a/volk/tmpl/volk_machines.tmpl.c b/volk/tmpl/volk_machines.tmpl.c new file mode 100644 index 000000000..57dd03c98 --- /dev/null +++ b/volk/tmpl/volk_machines.tmpl.c @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include +#include "volk_machines.h" + +struct volk_machine *volk_machines[] = { +#for $machine in $machines +#ifdef LV_MACHINE_$(machine.name.upper()) +&volk_machine_$(machine.name), +#endif +#end for +}; + +unsigned int n_volk_machines = sizeof(volk_machines)/sizeof(*volk_machines); diff --git a/volk/tmpl/volk_machines.tmpl.h b/volk/tmpl/volk_machines.tmpl.h new file mode 100644 index 000000000..b30e600ed --- /dev/null +++ b/volk/tmpl/volk_machines.tmpl.h @@ -0,0 +1,51 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_LIBVOLK_MACHINES_H +#define INCLUDED_LIBVOLK_MACHINES_H + +#include +#include + +__VOLK_DECL_BEGIN + +struct volk_machine { + const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format) + const char *name; + const unsigned int alignment; //the maximum byte alignment required for functions in this library + #for $kern in $kernels + const char *$(kern.name)_name; + const char *$(kern.name)_indices[$(len($archs))]; + const int $(kern.name)_arch_defs[$(len($archs))]; + const $(kern.pname) $(kern.name)_archs[$(len($archs))]; + const int $(kern.name)_n_archs; + #end for +}; + +#for $machine in $machines +#ifdef LV_MACHINE_$(machine.name.upper()) +extern struct volk_machine volk_machine_$(machine.name); +#endif +#end for + +__VOLK_DECL_END + +#endif //INCLUDED_LIBVOLK_MACHINES_H diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h index 947cb9ed5..2577df14e 100644 --- a/volk/tmpl/volk_typedefs.tmpl.h +++ b/volk/tmpl/volk_typedefs.tmpl.h @@ -26,7 +26,7 @@ #include #for $kern in $kernels -typedef $kern.rettype (*$(kern.name.replace('volk_', 'p_')))($kern.arglist); +typedef $kern.rettype (*$(kern.pname))($kern.arglist); #end for #endif /*INCLUDED_VOLK_TYPEDEFS*/ -- cgit From 37f9a62fd45ece1e6a92769fbb1798403c86ba9b Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 15 Apr 2012 19:11:52 -0700 Subject: volk: working build w/ cmakelists --- volk/tmpl/volk.tmpl.c | 6 +++--- volk/tmpl/volk_cpu.tmpl.c | 3 ++- volk/tmpl/volk_machine_xxx.tmpl.c | 23 +++++++++++++---------- volk/tmpl/volk_typedefs.tmpl.h | 2 +- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c index 161f49a43..c3a1544ff 100644 --- a/volk/tmpl/volk.tmpl.c +++ b/volk/tmpl/volk.tmpl.c @@ -56,7 +56,7 @@ unsigned int volk_get_alignment(void) { #for $kern in $kernels -void get_$(kern.name)($kern.arglist_defs) { +void get_$(kern.name)($kern.arglist_namedefs) { $kern.name = get_machine()->$(kern.name)_archs[volk_rank_archs( get_machine()->$(kern.name)_indices, get_machine()->$(kern.name)_arch_defs, @@ -69,7 +69,7 @@ void get_$(kern.name)($kern.arglist_defs) { $kern.pname $kern.name = &get_$(kern.name); -void $(kern.name)_manual($kern.arglist_defs, const char* arch) { +void $(kern.name)_manual($kern.arglist_namedefs, const char* arch) { const size_t index = get_index( get_machine()->$(kern.name)_indices, get_machine()->$(kern.name)_n_archs, @@ -80,7 +80,7 @@ void $(kern.name)_manual($kern.arglist_defs, const char* arch) { ); } -struct volk_func_desc volk_32f_x2_add_32f_a_get_func_desc(void) { +struct volk_func_desc $(kern.name)_get_func_desc(void) { struct volk_func_desc desc = { get_machine()->$(kern.name)_indices, get_machine()->$(kern.name)_arch_defs, diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index dc24309f7..c278afc2e 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -77,7 +77,7 @@ static inline unsigned int cpuid_edx(unsigned int op) { #endif static int has_neon(void){ -#ifdef LOOK_FOR_NEON +#if defined(LOOK_FOR_NEON) FILE *auxvec_f; unsigned long auxvec[2]; unsigned int found_neon = 0; @@ -97,6 +97,7 @@ static int has_neon(void){ #else return 0; +#endif } static int has_ppc(void){ diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index 57e652e4c..87204ee99 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -20,6 +20,7 @@ */ #set $this_machine = $machine_dict[$which] +#set $arch_names = map(lambda a: a.name, $this_machine.archs) #for $arch in $this_machine.archs #define LV_HAVE_$(arch.name.upper()) 1 @@ -58,30 +59,32 @@ $(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp #end def #ifdef LV_HAVE_ORC -struct volk_machine volk_machine_generic = { +struct volk_machine volk_machine_$(this_machine.name) = { $make_arch_have_list($this_machine.archs) | (1 << LV_ORC), "$this_machine.name", $this_machine.alignment, #for $kern in $kernels + #set $taglist, $tagdeps = $kern.get_tags($arch_names + ["orc"]) "$kern.name", - $make_tag_str_list($kern.taglist), - $make_tag_have_list($kern.tagdeps), - $make_tag_kern_list($kern.name, $kern.taglist), - $(len($kern.taglist)), + $make_tag_str_list($taglist), + $make_tag_have_list($tagdeps), + $make_tag_kern_list($kern.name, $taglist), + $(len($taglist)), #end for }; #else -struct volk_machine volk_machine_generic = { +struct volk_machine volk_machine_$(this_machine.name) = { $make_arch_have_list($this_machine.archs), "$this_machine.name", $this_machine.alignment, #for $kern in $kernels + #set $taglist, $tagdeps = $kern.get_tags($arch_names) "$kern.name", - $make_tag_str_list($kern.taglist), - $make_tag_have_list($kern.tagdeps), - $make_tag_kern_list($kern.name, $kern.taglist), - $(len($kern.taglist)), + $make_tag_str_list($taglist), + $make_tag_have_list($tagdeps), + $make_tag_kern_list($kern.name, $taglist), + $(len($taglist)), #end for }; diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h index 2577df14e..52a87242f 100644 --- a/volk/tmpl/volk_typedefs.tmpl.h +++ b/volk/tmpl/volk_typedefs.tmpl.h @@ -26,7 +26,7 @@ #include #for $kern in $kernels -typedef $kern.rettype (*$(kern.pname))($kern.arglist); +typedef $kern.rettype (*$(kern.pname))($kern.arglist_defs); #end for #endif /*INCLUDED_VOLK_TYPEDEFS*/ -- cgit From 3af0f815ae3442dacdac78acf238b277f472c404 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 00:29:26 -0700 Subject: volk: added compile utils and cleanup cmakelists --- volk/tmpl/volk_cpu.tmpl.c | 32 ++++++++++++++++++-------------- volk/tmpl/volk_machine_xxx.tmpl.c | 2 +- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index c278afc2e..7fe7036e2 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -25,23 +25,27 @@ struct VOLK_CPU volk_cpu; #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) -# define VOLK_CPU_x86 + #define VOLK_CPU_x86 #endif #if defined(VOLK_CPU_x86) -//implement get cpuid for gcc compilers using a copy of cpuid.h +//implement get cpuid for gcc compilers using a system or local copy of cpuid.h #if defined(__GNUC__) -#include -#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) + #if defined(HAVE_CPUID_H) + #include + #else + #include "gcc_x86_cpuid.h" + #endif + #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) //implement get cpuid for MSVC compilers using __cpuid intrinsic -#elif defined(_MSC_VER) -#include -#define cpuid_x86(op, r) __cpuid(r, op) +#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) + #include + #define cpuid_x86(op, r) __cpuid(r, op) #else -#error "A get cpuid for volk is not available on this compiler..." + #error "A get cpuid for volk is not available on this compiler..." #endif static inline unsigned int cpuid_eax(unsigned int op) { @@ -69,15 +73,16 @@ static inline unsigned int cpuid_edx(unsigned int op) { } #endif +//neon detection is linux specific #if defined(__arm__) && defined(__linux__) -#include -#include -#include -#define LOOK_FOR_NEON + #include + #include + #include + #define VOLK_CPU_ARM #endif static int has_neon(void){ -#if defined(LOOK_FOR_NEON) +#if defined(VOLK_CPU_ARM) FILE *auxvec_f; unsigned long auxvec[2]; unsigned int found_neon = 0; @@ -94,7 +99,6 @@ static int has_neon(void){ fclose(auxvec_f); return found_neon; - #else return 0; #endif diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index 87204ee99..023eea502 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -20,7 +20,7 @@ */ #set $this_machine = $machine_dict[$which] -#set $arch_names = map(lambda a: a.name, $this_machine.archs) +#set $arch_names = $this_machine.arch_names #for $arch in $this_machine.archs #define LV_HAVE_$(arch.name.upper()) 1 -- cgit From 25a3690954d80819fe59e179e4675c5cdf81a347 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 00:41:16 -0700 Subject: volk: make orc a normal arch with overrule --- volk/tmpl/volk_machine_xxx.tmpl.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index 023eea502..1f6a77501 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -58,22 +58,6 @@ $(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp {$(', '.join(['%s_%s'%($name, a) for a in $tags]))}#slurp #end def -#ifdef LV_HAVE_ORC -struct volk_machine volk_machine_$(this_machine.name) = { - $make_arch_have_list($this_machine.archs) | (1 << LV_ORC), - "$this_machine.name", - $this_machine.alignment, - #for $kern in $kernels - #set $taglist, $tagdeps = $kern.get_tags($arch_names + ["orc"]) - "$kern.name", - $make_tag_str_list($taglist), - $make_tag_have_list($tagdeps), - $make_tag_kern_list($kern.name, $taglist), - $(len($taglist)), - #end for -}; - -#else struct volk_machine volk_machine_$(this_machine.name) = { $make_arch_have_list($this_machine.archs), "$this_machine.name", @@ -87,5 +71,3 @@ struct volk_machine volk_machine_$(this_machine.name) = { $(len($taglist)), #end for }; - -#endif -- cgit From 4e86843aa236c4fabac35f236f22d16cd8fea99b Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 09:18:52 -0700 Subject: volk: python checks and build system stuff --- volk/tmpl/volk_cpu.tmpl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 7fe7036e2..ff27a7f96 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -21,6 +21,7 @@ #include #include +#include struct VOLK_CPU volk_cpu; -- cgit From 0faf0107e38e93bc3fddf8296449a1439978bec1 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 13:38:44 -0700 Subject: volk: updated build system for avx checking support updated copy of cpuid.h with the latest from gcc 4.6 --- volk/tmpl/volk_cpu.tmpl.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index ff27a7f96..58b2cfbd2 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -40,11 +40,28 @@ struct VOLK_CPU volk_cpu; #endif #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) + /* Return Intel AVX extended CPU capabilities register. + * This function will bomb on non-AVX-capable machines, so + * check for AVX capability before executing. + */ + static inline unsigned int __xgetbv(void) + { + unsigned int index, __eax, __edx; + __asm__ ("xgetbv" : "=a"(__eax), "=d"(__edx) : "c" (index)); + return __eax; + } + //implement get cpuid for MSVC compilers using __cpuid intrinsic #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) #include #define cpuid_x86(op, r) __cpuid(r, op) + #if defined(_XCR_XFEATURE_ENABLED_MASK) + #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) + #else + #define __xgetbv() 0 + #endif + #else #error "A get cpuid for volk is not available on this compiler..." #endif @@ -72,6 +89,14 @@ static inline unsigned int cpuid_edx(unsigned int op) { cpuid_x86 (op, regs); return regs[3]; } + +static inline unsigned int xgetbv(void) { + //check to make sure that xgetbv is enabled in OS + int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01; + if (xgetbv_enabled == 0) return 0; + return __xgetbv() & 0x6; +} + #endif //neon detection is linux specific @@ -114,7 +139,7 @@ static int has_ppc(void){ } #for $arch in $archs -static int i_can_has_$arch.name () { +static int i_can_has_$arch.name (void) { ######################################################################## #if $arch.type == "x86" and $arch.no_test #if defined(VOLK_CPU_x86) @@ -127,7 +152,11 @@ static int i_can_has_$arch.name () { #if defined(VOLK_CPU_x86) #set $op = hex($arch.op) unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op); - return ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; + unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; + #if $arch.check + if ($(arch.check)() == 0) return 0; + #end if + return hwcap; #else return 0; #endif -- cgit From 63809a2c71ea1d50aca6165a73b13fee0eae1d84 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 15:14:06 -0700 Subject: volk: fix for cpuid_eax check with hardcoded values --- volk/tmpl/volk_cpu.tmpl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 58b2cfbd2..1bd1ad211 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -164,8 +164,8 @@ static int i_can_has_$arch.name (void) { #else if $arch.op == 0x80000001 #if defined(VOLK_CPU_x86) #set $op = hex($arch.op) - unsigned int extended_fct_count = cpuid_eax($op); - if (extended_fct_count < $op) + unsigned int extended_fct_count = cpuid_eax(0x80000000); + if (extended_fct_count < 0x80000001) return $(arch.val)^1; unsigned int extended_features = cpuid_e$(arch.reg)x ($op); return ((extended_features >> $arch.shift) & 1) == $arch.val; -- cgit From 0595b7f2283e0aa1cdebefdac2d3a2702324727d Mon Sep 17 00:00:00 2001 From: Nick Foster Date: Mon, 16 Apr 2012 17:18:36 -0700 Subject: Volk: redo the archs.xml language to make checks generic. no more "type", no more piles of #if crap in the template. --- volk/tmpl/volk_cpu.tmpl.c | 96 +++++++++++++---------------------------------- 1 file changed, 27 insertions(+), 69 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 1bd1ad211..b050d8aea 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -64,40 +64,39 @@ struct VOLK_CPU volk_cpu; #else #error "A get cpuid for volk is not available on this compiler..." -#endif +#endif //defined(__GNUC__) -static inline unsigned int cpuid_eax(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[0]; -} +#endif //defined(VOLK_CPU_x86) -static inline unsigned int cpuid_ebx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[1]; -} - -static inline unsigned int cpuid_ecx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[2]; +static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(op, regs); + return regs[reg] >> bit & 0x01; +#else + return 0; +#endif } -static inline unsigned int cpuid_edx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[3]; +static inline unsigned int check_extended_cpuid(unsigned int val) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(0x80000000, regs); + return regs[0] >= val; +#else + return 0; +#endif } -static inline unsigned int xgetbv(void) { +static inline unsigned int get_avx_enabled(void) { +#if defined(VOLK_CPU_x86) //check to make sure that xgetbv is enabled in OS - int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01; - if (xgetbv_enabled == 0) return 0; + if(!cpuid_x86_bit(2, 1, 27)) return 0; return __xgetbv() & 0x6; -} - +#else + return 0; #endif +} //neon detection is linux specific #if defined(__arm__) && defined(__linux__) @@ -140,51 +139,10 @@ static int has_ppc(void){ #for $arch in $archs static int i_can_has_$arch.name (void) { -######################################################################## - #if $arch.type == "x86" and $arch.no_test -#if defined(VOLK_CPU_x86) - return 1; -#else - return 0; -#endif -######################################################################## - #else if $arch.op == 1 -#if defined(VOLK_CPU_x86) - #set $op = hex($arch.op) - unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op); - unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; - #if $arch.check - if ($(arch.check)() == 0) return 0; - #end if - return hwcap; -#else - return 0; -#endif -######################################################################## - #else if $arch.op == 0x80000001 -#if defined(VOLK_CPU_x86) - #set $op = hex($arch.op) - unsigned int extended_fct_count = cpuid_eax(0x80000000); - if (extended_fct_count < 0x80000001) - return $(arch.val)^1; - unsigned int extended_features = cpuid_e$(arch.reg)x ($op); - return ((extended_features >> $arch.shift) & 1) == $arch.val; -#else - return 0; -#endif -######################################################################## - #else if $arch.type == "powerpc" - return has_ppc(); -######################################################################## - #else if $arch.type == "arm" - return has_neon(); -######################################################################## - #else if $arch.type == "all" + #for $check, $params in $arch.checks + if ($(check)($(', '.join($params))) == 0) return 0; + #end for return 1; -######################################################################## - #else ##$ - return 0; - #end if } #end for -- cgit From 5a3d372c027237dad1b1d8bc55c73f3217509b8f Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 17:56:58 -0700 Subject: volk: fix msvc __cpuid pointer cast --- volk/tmpl/volk_cpu.tmpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index b050d8aea..94895a76f 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -54,7 +54,7 @@ struct VOLK_CPU volk_cpu; //implement get cpuid for MSVC compilers using __cpuid intrinsic #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) #include - #define cpuid_x86(op, r) __cpuid(r, op) + #define cpuid_x86(op, r) __cpuid(((int*)r), op) #if defined(_XCR_XFEATURE_ENABLED_MASK) #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) -- cgit From 0b117624e09643b7615316d3fa4741e2004529fa Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Mon, 16 Apr 2012 18:39:29 -0700 Subject: volk: move avx cpuid_x86_bit check in archs.xml --- volk/tmpl/volk_cpu.tmpl.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 94895a76f..e0a0e91de 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -90,8 +90,6 @@ static inline unsigned int check_extended_cpuid(unsigned int val) { static inline unsigned int get_avx_enabled(void) { #if defined(VOLK_CPU_x86) - //check to make sure that xgetbv is enabled in OS - if(!cpuid_x86_bit(2, 1, 27)) return 0; return __xgetbv() & 0x6; #else return 0; -- cgit From 283b6e911517313597756a9c3acd966c2dfe77e4 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Wed, 18 Apr 2012 11:16:59 -0700 Subject: volk: added gcc version check to xgetbv Reference https://code.google.com/p/pcsx2/issues/detail?id=1195 --- volk/tmpl/volk_cpu.tmpl.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index e0a0e91de..b312c4485 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -44,12 +44,16 @@ struct VOLK_CPU volk_cpu; * This function will bomb on non-AVX-capable machines, so * check for AVX capability before executing. */ - static inline unsigned int __xgetbv(void) - { - unsigned int index, __eax, __edx; - __asm__ ("xgetbv" : "=a"(__eax), "=d"(__edx) : "c" (index)); - return __eax; + #if defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 4) + static inline unsigned long long _xgetbv(unsigned int index){ + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((unsigned long long)edx << 32) | eax; } + #define __xgetbv() _xgetbv(0) + #else + #define __xgetbv() 0 + #endif //implement get cpuid for MSVC compilers using __cpuid intrinsic #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) -- cgit From 3d913cde5286c77ae4805bf7511b59190f2266dd Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Wed, 18 Apr 2012 13:33:07 -0700 Subject: volk: gcc version check without __GNUC_PREREQ --- volk/tmpl/volk_cpu.tmpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index b312c4485..1aa36d2bf 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -44,7 +44,7 @@ struct VOLK_CPU volk_cpu; * This function will bomb on non-AVX-capable machines, so * check for AVX capability before executing. */ - #if defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 4) + #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 static inline unsigned long long _xgetbv(unsigned int index){ unsigned int eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); -- cgit From f51c4dc7e5e8bab60f9431c51159f9e35907fbb7 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Wed, 18 Apr 2012 15:17:55 -0700 Subject: volk: added set_float_rounding to volk_cpu_init --- volk/tmpl/volk_cpu.tmpl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 1aa36d2bf..81fc679cb 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -149,10 +149,29 @@ static int i_can_has_$arch.name (void) { #end for +#if defined(HAVE_FENV_H) + #include + static inline void set_float_rounding(void){ + fesetround(FE_TONEAREST); + } +#elif defined(_MSC_VER) + #include + static inline void set_float_rounding(void){ + unsigned int cwrd; + _controlfp_s(&cwrd, 0, 0); + _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC); + } +#else + static inline void set_float_rounding(void){ + //do nothing + } +#endif + void volk_cpu_init() { #for $arch in $archs volk_cpu.has_$arch.name = &i_can_has_$arch.name; #end for + set_float_rounding(); } unsigned int volk_get_lvarch() { -- cgit From bb61c325dabfc80bbd8404b2892575948b77496b Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Thu, 19 Apr 2012 00:50:18 -0700 Subject: volk: code simplification, overrule macro and python opts --- volk/tmpl/volk_machine_xxx.tmpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index 1f6a77501..e405bd693 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -19,7 +19,7 @@ * Boston, MA 02110-1301, USA. */ -#set $this_machine = $machine_dict[$which] +#set $this_machine = $machine_dict[$args[0]] #set $arch_names = $this_machine.arch_names #for $arch in $this_machine.archs -- cgit From e826097e09fdfb04d14bf87861646b88229db881 Mon Sep 17 00:00:00 2001 From: Josh Blum Date: Sun, 13 Jan 2013 13:51:46 -0800 Subject: gras: support changeset for 3.6.4 used volk from next branch cf5c930d89ac89ba5a0da4a616c88d3c37e018ae for grextras support (it uses the dispatcher) empty stubs for the gr_basic_block msg passing. This is going to be difficult to figure out. The alias stuff may or may not be related most qa pass, there seems to be some additional issues, will be working through them on futher commits Conflicts: gnuradio-core/CMakeLists.txt gnuradio-core/src/lib/runtime/CMakeLists.txt gnuradio-core/src/lib/runtime/gr_block.cc gnuradio-core/src/lib/runtime/gr_block.h gnuradio-core/src/lib/runtime/gr_hier_block2.h gnuradio-core/src/lib/runtime/gr_top_block.h gnuradio-core/src/python/gnuradio/gr/__init__.py gr-audio/examples/c++/CMakeLists.txt gr-fcd/examples/c++/CMakeLists.txt grc/python/Port.py --- volk/tmpl/volk.tmpl.c | 121 +++++++++++++++++++++++++++++++------- volk/tmpl/volk.tmpl.h | 55 ++++++++++++++--- volk/tmpl/volk_machine_xxx.tmpl.c | 30 ++++++---- volk/tmpl/volk_machines.tmpl.h | 14 +++-- volk/tmpl/volk_typedefs.tmpl.h | 2 +- 5 files changed, 174 insertions(+), 48 deletions(-) (limited to 'volk/tmpl') diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c index c3a1544ff..f915f157f 100644 --- a/volk/tmpl/volk.tmpl.c +++ b/volk/tmpl/volk.tmpl.c @@ -27,6 +27,10 @@ #include #include #include +#include + +static size_t __alignment = 0; +static intptr_t __alignment_mask = 0; struct volk_machine *get_machine(void) { extern struct volk_machine *volk_machines[]; @@ -46,45 +50,118 @@ struct volk_machine *get_machine(void) { } } printf("Using Volk machine: %s\n", machine->name); + __alignment = machine->alignment; + __alignment_mask = (intptr_t)(__alignment-1); return machine; } } -unsigned int volk_get_alignment(void) { - return get_machine()->alignment; +size_t volk_get_alignment(void) +{ + get_machine(); //ensures alignment is set + return __alignment; +} + +bool volk_is_aligned(const void *ptr) +{ + return ((intptr_t)(ptr) & __alignment_mask) == 0; } +#define LV_HAVE_GENERIC +#define LV_HAVE_DISPATCHER + #for $kern in $kernels -void get_$(kern.name)($kern.arglist_namedefs) { - $kern.name = get_machine()->$(kern.name)_archs[volk_rank_archs( - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_arch_defs, - get_machine()->$(kern.name)_n_archs, - get_machine()->$(kern.name)_name, - volk_get_lvarch() - )]; +#if $kern.has_dispatcher +#include //pulls in the dispatcher +#end if + +static inline void __$(kern.name)_d($kern.arglist_full) +{ + #if $kern.has_dispatcher + $(kern.name)_dispatcher($kern.arglist_names); + return; + #end if + + if (volk_is_aligned( + #set $num_open_parens = 0 + #for $arg_type, $arg_name in $kern.args + #if '*' in $arg_type + VOLK_OR_PTR($arg_name, + #set $num_open_parens += 1 + #end if + #end for + 0$(')'*$num_open_parens) + )){ + $(kern.name)_a($kern.arglist_names); + } + else{ + $(kern.name)_u($kern.arglist_names); + } +} + +static inline void __init_$(kern.name)(void) +{ + const char *name = get_machine()->$(kern.name)_name; + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + const size_t index_a = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/); + const size_t index_u = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/); + $(kern.name)_a = get_machine()->$(kern.name)_impls[index_a]; + $(kern.name)_u = get_machine()->$(kern.name)_impls[index_u]; + + assert($(kern.name)_a); + assert($(kern.name)_u); + + $(kern.name) = &__$(kern.name)_d; +} + +static inline void __$(kern.name)_a($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_a($kern.arglist_names); +} + +static inline void __$(kern.name)_u($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_u($kern.arglist_names); +} + +static inline void __$(kern.name)($kern.arglist_full) +{ + __init_$(kern.name)(); $(kern.name)($kern.arglist_names); } -$kern.pname $kern.name = &get_$(kern.name); +$kern.pname $(kern.name)_a = &__$(kern.name)_a; +$kern.pname $(kern.name)_u = &__$(kern.name)_u; +$kern.pname $(kern.name) = &__$(kern.name); -void $(kern.name)_manual($kern.arglist_namedefs, const char* arch) { - const size_t index = get_index( - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_n_archs, - arch +void $(kern.name)_manual($kern.arglist_full, const char* impl_name) +{ + const int index = volk_get_index( + get_machine()->$(kern.name)_impl_names, + get_machine()->$(kern.name)_n_impls, + impl_name ); - get_machine()->$(kern.name)_archs[index]( + get_machine()->$(kern.name)_impls[index]( $kern.arglist_names ); } -struct volk_func_desc $(kern.name)_get_func_desc(void) { - struct volk_func_desc desc = { - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_arch_defs, - get_machine()->$(kern.name)_n_archs +volk_func_desc_t $(kern.name)_get_func_desc(void) { + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + volk_func_desc_t desc = { + impl_names, + impl_deps, + alignment, + n_impls }; return desc; } diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h index 161579e46..464b65598 100644 --- a/volk/tmpl/volk.tmpl.h +++ b/volk/tmpl/volk.tmpl.h @@ -27,20 +27,59 @@ #include #include +#include +#include + __VOLK_DECL_BEGIN -struct volk_func_desc { - const char **indices; - const int *arch_defs; - const int n_archs; -}; +typedef struct volk_func_desc +{ + const char **impl_names; + const int *impl_deps; + const bool *impl_alignment; + const size_t n_impls; +} volk_func_desc_t; + +//! Get the machine alignment in bytes +VOLK_API size_t volk_get_alignment(void); + +/*! + * The VOLK_OR_PTR macro is a convenience macro + * for checking the alignment of a set of pointers. + * Example usage: + * volk_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2))) + */ +#define VOLK_OR_PTR(ptr0, ptr1) \ + (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1))) -VOLK_API unsigned int volk_get_alignment(void); +/*! + * Is the pointer on a machine alignment boundary? + * + * Note: for performance reasons, this function + * is not usable until another volk API call is made + * which will perform certain initialization tasks. + * + * \param ptr the pointer to some memory buffer + * \return 1 for alignment boundary, else 0 + */ +VOLK_API bool volk_is_aligned(const void *ptr); #for $kern in $kernels + +//! A function pointer to the dispatcher implementation extern VOLK_API $kern.pname $kern.name; -extern VOLK_API void $(kern.name)_manual($kern.arglist_namedefs, const char* arch); -extern VOLK_API struct volk_func_desc $(kern.name)_get_func_desc(void); + +//! A function pointer to the fastest aligned implementation +extern VOLK_API $kern.pname $(kern.name)_a; + +//! A function pointer to the fastest unaligned implementation +extern VOLK_API $kern.pname $(kern.name)_u; + +//! Call into a specific implementation given by name +extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name); + +//! Get description paramaters for this kernel +extern VOLK_API volk_func_desc_t $(kern.name)_get_func_desc(void); #end for __VOLK_DECL_END diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index e405bd693..68d7f3eba 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -44,18 +44,23 @@ $(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp #end def ######################################################################## -#def make_tag_str_list($tags) -{$(', '.join(['"%s"'%a for a in $tags]))}#slurp +#def make_impl_name_list($impls) +{$(', '.join(['"%s"'%i.name for i in $impls]))}#slurp #end def ######################################################################## -#def make_tag_have_list($deps) -{$(', '.join([' | '.join(['(1 << LV_%s)'%a.upper() for a in d]) for d in $deps]))}#slurp +#def make_impl_align_list($impls) +{$(', '.join(['true' if i.is_aligned else 'false' for i in $impls]))}#slurp #end def ######################################################################## -#def make_tag_kern_list($name, $tags) -{$(', '.join(['%s_%s'%($name, a) for a in $tags]))}#slurp +#def make_impl_deps_list($impls) +{$(', '.join([' | '.join(['(1 << LV_%s)'%d.upper() for d in i.deps]) for i in $impls]))}#slurp +#end def + +######################################################################## +#def make_impl_fcn_list($name, $impls) +{$(', '.join(['%s_%s'%($name, i.name) for i in $impls]))}#slurp #end def struct volk_machine volk_machine_$(this_machine.name) = { @@ -63,11 +68,12 @@ struct volk_machine volk_machine_$(this_machine.name) = { "$this_machine.name", $this_machine.alignment, #for $kern in $kernels - #set $taglist, $tagdeps = $kern.get_tags($arch_names) - "$kern.name", - $make_tag_str_list($taglist), - $make_tag_have_list($tagdeps), - $make_tag_kern_list($kern.name, $taglist), - $(len($taglist)), + #set $impls = $kern.get_impls($arch_names) + "$kern.name", ##//kernel name + $make_impl_name_list($impls), ##//list of kernel implementations by name + $make_impl_deps_list($impls), ##//list of arch dependencies per implementation + $make_impl_align_list($impls), ##//alignment required? for each implementation + $make_impl_fcn_list($kern.name, $impls), ##//pointer to each implementation + $(len($impls)), ##//number of implementations listed here #end for }; diff --git a/volk/tmpl/volk_machines.tmpl.h b/volk/tmpl/volk_machines.tmpl.h index b30e600ed..7e11b1079 100644 --- a/volk/tmpl/volk_machines.tmpl.h +++ b/volk/tmpl/volk_machines.tmpl.h @@ -25,18 +25,22 @@ #include #include +#include +#include + __VOLK_DECL_BEGIN struct volk_machine { const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format) const char *name; - const unsigned int alignment; //the maximum byte alignment required for functions in this library + const size_t alignment; //the maximum byte alignment required for functions in this library #for $kern in $kernels const char *$(kern.name)_name; - const char *$(kern.name)_indices[$(len($archs))]; - const int $(kern.name)_arch_defs[$(len($archs))]; - const $(kern.pname) $(kern.name)_archs[$(len($archs))]; - const int $(kern.name)_n_archs; + const char *$(kern.name)_impl_names[$(len($archs))]; + const int $(kern.name)_impl_deps[$(len($archs))]; + const bool $(kern.name)_impl_alignment[$(len($archs))]; + const $(kern.pname) $(kern.name)_impls[$(len($archs))]; + const size_t $(kern.name)_n_impls; #end for }; diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h index 52a87242f..6f5426965 100644 --- a/volk/tmpl/volk_typedefs.tmpl.h +++ b/volk/tmpl/volk_typedefs.tmpl.h @@ -26,7 +26,7 @@ #include #for $kern in $kernels -typedef $kern.rettype (*$(kern.pname))($kern.arglist_defs); +typedef void (*$(kern.pname))($kern.arglist_types); #end for #endif /*INCLUDED_VOLK_TYPEDEFS*/ -- cgit