diff options
Diffstat (limited to 'volk/tmpl')
-rw-r--r-- | volk/tmpl/volk.tmpl.c | 121 | ||||
-rw-r--r-- | volk/tmpl/volk.tmpl.h | 55 | ||||
-rw-r--r-- | volk/tmpl/volk_machine_xxx.tmpl.c | 30 | ||||
-rw-r--r-- | volk/tmpl/volk_machines.tmpl.h | 14 | ||||
-rw-r--r-- | volk/tmpl/volk_typedefs.tmpl.h | 2 |
5 files changed, 174 insertions, 48 deletions
diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c index c3a1544ff..f915f157f 100644 --- a/volk/tmpl/volk.tmpl.c +++ b/volk/tmpl/volk.tmpl.c @@ -27,6 +27,10 @@ #include <volk/volk.h> #include <stdio.h> #include <string.h> +#include <assert.h> + +static size_t __alignment = 0; +static intptr_t __alignment_mask = 0; struct volk_machine *get_machine(void) { extern struct volk_machine *volk_machines[]; @@ -46,45 +50,118 @@ struct volk_machine *get_machine(void) { } } printf("Using Volk machine: %s\n", machine->name); + __alignment = machine->alignment; + __alignment_mask = (intptr_t)(__alignment-1); return machine; } } -unsigned int volk_get_alignment(void) { - return get_machine()->alignment; +size_t volk_get_alignment(void) +{ + get_machine(); //ensures alignment is set + return __alignment; +} + +bool volk_is_aligned(const void *ptr) +{ + return ((intptr_t)(ptr) & __alignment_mask) == 0; } +#define LV_HAVE_GENERIC +#define LV_HAVE_DISPATCHER + #for $kern in $kernels -void get_$(kern.name)($kern.arglist_namedefs) { - $kern.name = get_machine()->$(kern.name)_archs[volk_rank_archs( - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_arch_defs, - get_machine()->$(kern.name)_n_archs, - get_machine()->$(kern.name)_name, - volk_get_lvarch() - )]; +#if $kern.has_dispatcher +#include <volk/$(kern.name).h> //pulls in the dispatcher +#end if + +static inline void __$(kern.name)_d($kern.arglist_full) +{ + #if $kern.has_dispatcher + $(kern.name)_dispatcher($kern.arglist_names); + return; + #end if + + if (volk_is_aligned( + #set $num_open_parens = 0 + #for $arg_type, $arg_name in $kern.args + #if '*' in $arg_type + VOLK_OR_PTR($arg_name, + #set $num_open_parens += 1 + #end if + #end for + 0$(')'*$num_open_parens) + )){ + $(kern.name)_a($kern.arglist_names); + } + else{ + $(kern.name)_u($kern.arglist_names); + } +} + +static inline void __init_$(kern.name)(void) +{ + const char *name = get_machine()->$(kern.name)_name; + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + const size_t index_a = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/); + const size_t index_u = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/); + $(kern.name)_a = get_machine()->$(kern.name)_impls[index_a]; + $(kern.name)_u = get_machine()->$(kern.name)_impls[index_u]; + + assert($(kern.name)_a); + assert($(kern.name)_u); + + $(kern.name) = &__$(kern.name)_d; +} + +static inline void __$(kern.name)_a($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_a($kern.arglist_names); +} + +static inline void __$(kern.name)_u($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_u($kern.arglist_names); +} + +static inline void __$(kern.name)($kern.arglist_full) +{ + __init_$(kern.name)(); $(kern.name)($kern.arglist_names); } -$kern.pname $kern.name = &get_$(kern.name); +$kern.pname $(kern.name)_a = &__$(kern.name)_a; +$kern.pname $(kern.name)_u = &__$(kern.name)_u; +$kern.pname $(kern.name) = &__$(kern.name); -void $(kern.name)_manual($kern.arglist_namedefs, const char* arch) { - const size_t index = get_index( - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_n_archs, - arch +void $(kern.name)_manual($kern.arglist_full, const char* impl_name) +{ + const int index = volk_get_index( + get_machine()->$(kern.name)_impl_names, + get_machine()->$(kern.name)_n_impls, + impl_name ); - get_machine()->$(kern.name)_archs[index]( + get_machine()->$(kern.name)_impls[index]( $kern.arglist_names ); } -struct volk_func_desc $(kern.name)_get_func_desc(void) { - struct volk_func_desc desc = { - get_machine()->$(kern.name)_indices, - get_machine()->$(kern.name)_arch_defs, - get_machine()->$(kern.name)_n_archs +volk_func_desc_t $(kern.name)_get_func_desc(void) { + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + volk_func_desc_t desc = { + impl_names, + impl_deps, + alignment, + n_impls }; return desc; } diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h index 161579e46..464b65598 100644 --- a/volk/tmpl/volk.tmpl.h +++ b/volk/tmpl/volk.tmpl.h @@ -27,20 +27,59 @@ #include <volk/volk_common.h> #include <volk/volk_complex.h> +#include <stdlib.h> +#include <stdbool.h> + __VOLK_DECL_BEGIN -struct volk_func_desc { - const char **indices; - const int *arch_defs; - const int n_archs; -}; +typedef struct volk_func_desc +{ + const char **impl_names; + const int *impl_deps; + const bool *impl_alignment; + const size_t n_impls; +} volk_func_desc_t; + +//! Get the machine alignment in bytes +VOLK_API size_t volk_get_alignment(void); + +/*! + * The VOLK_OR_PTR macro is a convenience macro + * for checking the alignment of a set of pointers. + * Example usage: + * volk_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2))) + */ +#define VOLK_OR_PTR(ptr0, ptr1) \ + (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1))) -VOLK_API unsigned int volk_get_alignment(void); +/*! + * Is the pointer on a machine alignment boundary? + * + * Note: for performance reasons, this function + * is not usable until another volk API call is made + * which will perform certain initialization tasks. + * + * \param ptr the pointer to some memory buffer + * \return 1 for alignment boundary, else 0 + */ +VOLK_API bool volk_is_aligned(const void *ptr); #for $kern in $kernels + +//! A function pointer to the dispatcher implementation extern VOLK_API $kern.pname $kern.name; -extern VOLK_API void $(kern.name)_manual($kern.arglist_namedefs, const char* arch); -extern VOLK_API struct volk_func_desc $(kern.name)_get_func_desc(void); + +//! A function pointer to the fastest aligned implementation +extern VOLK_API $kern.pname $(kern.name)_a; + +//! A function pointer to the fastest unaligned implementation +extern VOLK_API $kern.pname $(kern.name)_u; + +//! Call into a specific implementation given by name +extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name); + +//! Get description paramaters for this kernel +extern VOLK_API volk_func_desc_t $(kern.name)_get_func_desc(void); #end for __VOLK_DECL_END diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c index e405bd693..68d7f3eba 100644 --- a/volk/tmpl/volk_machine_xxx.tmpl.c +++ b/volk/tmpl/volk_machine_xxx.tmpl.c @@ -44,18 +44,23 @@ $(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp #end def ######################################################################## -#def make_tag_str_list($tags) -{$(', '.join(['"%s"'%a for a in $tags]))}#slurp +#def make_impl_name_list($impls) +{$(', '.join(['"%s"'%i.name for i in $impls]))}#slurp #end def ######################################################################## -#def make_tag_have_list($deps) -{$(', '.join([' | '.join(['(1 << LV_%s)'%a.upper() for a in d]) for d in $deps]))}#slurp +#def make_impl_align_list($impls) +{$(', '.join(['true' if i.is_aligned else 'false' for i in $impls]))}#slurp #end def ######################################################################## -#def make_tag_kern_list($name, $tags) -{$(', '.join(['%s_%s'%($name, a) for a in $tags]))}#slurp +#def make_impl_deps_list($impls) +{$(', '.join([' | '.join(['(1 << LV_%s)'%d.upper() for d in i.deps]) for i in $impls]))}#slurp +#end def + +######################################################################## +#def make_impl_fcn_list($name, $impls) +{$(', '.join(['%s_%s'%($name, i.name) for i in $impls]))}#slurp #end def struct volk_machine volk_machine_$(this_machine.name) = { @@ -63,11 +68,12 @@ struct volk_machine volk_machine_$(this_machine.name) = { "$this_machine.name", $this_machine.alignment, #for $kern in $kernels - #set $taglist, $tagdeps = $kern.get_tags($arch_names) - "$kern.name", - $make_tag_str_list($taglist), - $make_tag_have_list($tagdeps), - $make_tag_kern_list($kern.name, $taglist), - $(len($taglist)), + #set $impls = $kern.get_impls($arch_names) + "$kern.name", ##//kernel name + $make_impl_name_list($impls), ##//list of kernel implementations by name + $make_impl_deps_list($impls), ##//list of arch dependencies per implementation + $make_impl_align_list($impls), ##//alignment required? for each implementation + $make_impl_fcn_list($kern.name, $impls), ##//pointer to each implementation + $(len($impls)), ##//number of implementations listed here #end for }; diff --git a/volk/tmpl/volk_machines.tmpl.h b/volk/tmpl/volk_machines.tmpl.h index b30e600ed..7e11b1079 100644 --- a/volk/tmpl/volk_machines.tmpl.h +++ b/volk/tmpl/volk_machines.tmpl.h @@ -25,18 +25,22 @@ #include <volk/volk_common.h> #include <volk/volk_typedefs.h> +#include <stdbool.h> +#include <stdlib.h> + __VOLK_DECL_BEGIN struct volk_machine { const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format) const char *name; - const unsigned int alignment; //the maximum byte alignment required for functions in this library + const size_t alignment; //the maximum byte alignment required for functions in this library #for $kern in $kernels const char *$(kern.name)_name; - const char *$(kern.name)_indices[$(len($archs))]; - const int $(kern.name)_arch_defs[$(len($archs))]; - const $(kern.pname) $(kern.name)_archs[$(len($archs))]; - const int $(kern.name)_n_archs; + const char *$(kern.name)_impl_names[$(len($archs))]; + const int $(kern.name)_impl_deps[$(len($archs))]; + const bool $(kern.name)_impl_alignment[$(len($archs))]; + const $(kern.pname) $(kern.name)_impls[$(len($archs))]; + const size_t $(kern.name)_n_impls; #end for }; diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h index 52a87242f..6f5426965 100644 --- a/volk/tmpl/volk_typedefs.tmpl.h +++ b/volk/tmpl/volk_typedefs.tmpl.h @@ -26,7 +26,7 @@ #include <volk/volk_complex.h> #for $kern in $kernels -typedef $kern.rettype (*$(kern.pname))($kern.arglist_defs); +typedef void (*$(kern.pname))($kern.arglist_types); #end for #endif /*INCLUDED_VOLK_TYPEDEFS*/ |