summaryrefslogtreecommitdiff
path: root/volk/tmpl
diff options
context:
space:
mode:
Diffstat (limited to 'volk/tmpl')
-rw-r--r--volk/tmpl/volk.tmpl.c121
-rw-r--r--volk/tmpl/volk.tmpl.h55
-rw-r--r--volk/tmpl/volk_machine_xxx.tmpl.c30
-rw-r--r--volk/tmpl/volk_machines.tmpl.h14
-rw-r--r--volk/tmpl/volk_typedefs.tmpl.h2
5 files changed, 174 insertions, 48 deletions
diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c
index c3a1544ff..f915f157f 100644
--- a/volk/tmpl/volk.tmpl.c
+++ b/volk/tmpl/volk.tmpl.c
@@ -27,6 +27,10 @@
#include <volk/volk.h>
#include <stdio.h>
#include <string.h>
+#include <assert.h>
+
+static size_t __alignment = 0;
+static intptr_t __alignment_mask = 0;
struct volk_machine *get_machine(void) {
extern struct volk_machine *volk_machines[];
@@ -46,45 +50,118 @@ struct volk_machine *get_machine(void) {
}
}
printf("Using Volk machine: %s\n", machine->name);
+ __alignment = machine->alignment;
+ __alignment_mask = (intptr_t)(__alignment-1);
return machine;
}
}
-unsigned int volk_get_alignment(void) {
- return get_machine()->alignment;
+size_t volk_get_alignment(void)
+{
+ get_machine(); //ensures alignment is set
+ return __alignment;
+}
+
+bool volk_is_aligned(const void *ptr)
+{
+ return ((intptr_t)(ptr) & __alignment_mask) == 0;
}
+#define LV_HAVE_GENERIC
+#define LV_HAVE_DISPATCHER
+
#for $kern in $kernels
-void get_$(kern.name)($kern.arglist_namedefs) {
- $kern.name = get_machine()->$(kern.name)_archs[volk_rank_archs(
- get_machine()->$(kern.name)_indices,
- get_machine()->$(kern.name)_arch_defs,
- get_machine()->$(kern.name)_n_archs,
- get_machine()->$(kern.name)_name,
- volk_get_lvarch()
- )];
+#if $kern.has_dispatcher
+#include <volk/$(kern.name).h> //pulls in the dispatcher
+#end if
+
+static inline void __$(kern.name)_d($kern.arglist_full)
+{
+ #if $kern.has_dispatcher
+ $(kern.name)_dispatcher($kern.arglist_names);
+ return;
+ #end if
+
+ if (volk_is_aligned(
+ #set $num_open_parens = 0
+ #for $arg_type, $arg_name in $kern.args
+ #if '*' in $arg_type
+ VOLK_OR_PTR($arg_name,
+ #set $num_open_parens += 1
+ #end if
+ #end for
+ 0$(')'*$num_open_parens)
+ )){
+ $(kern.name)_a($kern.arglist_names);
+ }
+ else{
+ $(kern.name)_u($kern.arglist_names);
+ }
+}
+
+static inline void __init_$(kern.name)(void)
+{
+ const char *name = get_machine()->$(kern.name)_name;
+ const char **impl_names = get_machine()->$(kern.name)_impl_names;
+ const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
+ const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
+ const size_t n_impls = get_machine()->$(kern.name)_n_impls;
+ const size_t index_a = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/);
+ const size_t index_u = volk_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/);
+ $(kern.name)_a = get_machine()->$(kern.name)_impls[index_a];
+ $(kern.name)_u = get_machine()->$(kern.name)_impls[index_u];
+
+ assert($(kern.name)_a);
+ assert($(kern.name)_u);
+
+ $(kern.name) = &__$(kern.name)_d;
+}
+
+static inline void __$(kern.name)_a($kern.arglist_full)
+{
+ __init_$(kern.name)();
+ $(kern.name)_a($kern.arglist_names);
+}
+
+static inline void __$(kern.name)_u($kern.arglist_full)
+{
+ __init_$(kern.name)();
+ $(kern.name)_u($kern.arglist_names);
+}
+
+static inline void __$(kern.name)($kern.arglist_full)
+{
+ __init_$(kern.name)();
$(kern.name)($kern.arglist_names);
}
-$kern.pname $kern.name = &get_$(kern.name);
+$kern.pname $(kern.name)_a = &__$(kern.name)_a;
+$kern.pname $(kern.name)_u = &__$(kern.name)_u;
+$kern.pname $(kern.name) = &__$(kern.name);
-void $(kern.name)_manual($kern.arglist_namedefs, const char* arch) {
- const size_t index = get_index(
- get_machine()->$(kern.name)_indices,
- get_machine()->$(kern.name)_n_archs,
- arch
+void $(kern.name)_manual($kern.arglist_full, const char* impl_name)
+{
+ const int index = volk_get_index(
+ get_machine()->$(kern.name)_impl_names,
+ get_machine()->$(kern.name)_n_impls,
+ impl_name
);
- get_machine()->$(kern.name)_archs[index](
+ get_machine()->$(kern.name)_impls[index](
$kern.arglist_names
);
}
-struct volk_func_desc $(kern.name)_get_func_desc(void) {
- struct volk_func_desc desc = {
- get_machine()->$(kern.name)_indices,
- get_machine()->$(kern.name)_arch_defs,
- get_machine()->$(kern.name)_n_archs
+volk_func_desc_t $(kern.name)_get_func_desc(void) {
+ const char **impl_names = get_machine()->$(kern.name)_impl_names;
+ const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
+ const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
+ const size_t n_impls = get_machine()->$(kern.name)_n_impls;
+ volk_func_desc_t desc = {
+ impl_names,
+ impl_deps,
+ alignment,
+ n_impls
};
return desc;
}
diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h
index 161579e46..464b65598 100644
--- a/volk/tmpl/volk.tmpl.h
+++ b/volk/tmpl/volk.tmpl.h
@@ -27,20 +27,59 @@
#include <volk/volk_common.h>
#include <volk/volk_complex.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
__VOLK_DECL_BEGIN
-struct volk_func_desc {
- const char **indices;
- const int *arch_defs;
- const int n_archs;
-};
+typedef struct volk_func_desc
+{
+ const char **impl_names;
+ const int *impl_deps;
+ const bool *impl_alignment;
+ const size_t n_impls;
+} volk_func_desc_t;
+
+//! Get the machine alignment in bytes
+VOLK_API size_t volk_get_alignment(void);
+
+/*!
+ * The VOLK_OR_PTR macro is a convenience macro
+ * for checking the alignment of a set of pointers.
+ * Example usage:
+ * volk_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2)))
+ */
+#define VOLK_OR_PTR(ptr0, ptr1) \
+ (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1)))
-VOLK_API unsigned int volk_get_alignment(void);
+/*!
+ * Is the pointer on a machine alignment boundary?
+ *
+ * Note: for performance reasons, this function
+ * is not usable until another volk API call is made
+ * which will perform certain initialization tasks.
+ *
+ * \param ptr the pointer to some memory buffer
+ * \return 1 for alignment boundary, else 0
+ */
+VOLK_API bool volk_is_aligned(const void *ptr);
#for $kern in $kernels
+
+//! A function pointer to the dispatcher implementation
extern VOLK_API $kern.pname $kern.name;
-extern VOLK_API void $(kern.name)_manual($kern.arglist_namedefs, const char* arch);
-extern VOLK_API struct volk_func_desc $(kern.name)_get_func_desc(void);
+
+//! A function pointer to the fastest aligned implementation
+extern VOLK_API $kern.pname $(kern.name)_a;
+
+//! A function pointer to the fastest unaligned implementation
+extern VOLK_API $kern.pname $(kern.name)_u;
+
+//! Call into a specific implementation given by name
+extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name);
+
+//! Get description paramaters for this kernel
+extern VOLK_API volk_func_desc_t $(kern.name)_get_func_desc(void);
#end for
__VOLK_DECL_END
diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c b/volk/tmpl/volk_machine_xxx.tmpl.c
index e405bd693..68d7f3eba 100644
--- a/volk/tmpl/volk_machine_xxx.tmpl.c
+++ b/volk/tmpl/volk_machine_xxx.tmpl.c
@@ -44,18 +44,23 @@ $(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp
#end def
########################################################################
-#def make_tag_str_list($tags)
-{$(', '.join(['"%s"'%a for a in $tags]))}#slurp
+#def make_impl_name_list($impls)
+{$(', '.join(['"%s"'%i.name for i in $impls]))}#slurp
#end def
########################################################################
-#def make_tag_have_list($deps)
-{$(', '.join([' | '.join(['(1 << LV_%s)'%a.upper() for a in d]) for d in $deps]))}#slurp
+#def make_impl_align_list($impls)
+{$(', '.join(['true' if i.is_aligned else 'false' for i in $impls]))}#slurp
#end def
########################################################################
-#def make_tag_kern_list($name, $tags)
-{$(', '.join(['%s_%s'%($name, a) for a in $tags]))}#slurp
+#def make_impl_deps_list($impls)
+{$(', '.join([' | '.join(['(1 << LV_%s)'%d.upper() for d in i.deps]) for i in $impls]))}#slurp
+#end def
+
+########################################################################
+#def make_impl_fcn_list($name, $impls)
+{$(', '.join(['%s_%s'%($name, i.name) for i in $impls]))}#slurp
#end def
struct volk_machine volk_machine_$(this_machine.name) = {
@@ -63,11 +68,12 @@ struct volk_machine volk_machine_$(this_machine.name) = {
"$this_machine.name",
$this_machine.alignment,
#for $kern in $kernels
- #set $taglist, $tagdeps = $kern.get_tags($arch_names)
- "$kern.name",
- $make_tag_str_list($taglist),
- $make_tag_have_list($tagdeps),
- $make_tag_kern_list($kern.name, $taglist),
- $(len($taglist)),
+ #set $impls = $kern.get_impls($arch_names)
+ "$kern.name", ##//kernel name
+ $make_impl_name_list($impls), ##//list of kernel implementations by name
+ $make_impl_deps_list($impls), ##//list of arch dependencies per implementation
+ $make_impl_align_list($impls), ##//alignment required? for each implementation
+ $make_impl_fcn_list($kern.name, $impls), ##//pointer to each implementation
+ $(len($impls)), ##//number of implementations listed here
#end for
};
diff --git a/volk/tmpl/volk_machines.tmpl.h b/volk/tmpl/volk_machines.tmpl.h
index b30e600ed..7e11b1079 100644
--- a/volk/tmpl/volk_machines.tmpl.h
+++ b/volk/tmpl/volk_machines.tmpl.h
@@ -25,18 +25,22 @@
#include <volk/volk_common.h>
#include <volk/volk_typedefs.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
__VOLK_DECL_BEGIN
struct volk_machine {
const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_get_lvarch format)
const char *name;
- const unsigned int alignment; //the maximum byte alignment required for functions in this library
+ const size_t alignment; //the maximum byte alignment required for functions in this library
#for $kern in $kernels
const char *$(kern.name)_name;
- const char *$(kern.name)_indices[$(len($archs))];
- const int $(kern.name)_arch_defs[$(len($archs))];
- const $(kern.pname) $(kern.name)_archs[$(len($archs))];
- const int $(kern.name)_n_archs;
+ const char *$(kern.name)_impl_names[$(len($archs))];
+ const int $(kern.name)_impl_deps[$(len($archs))];
+ const bool $(kern.name)_impl_alignment[$(len($archs))];
+ const $(kern.pname) $(kern.name)_impls[$(len($archs))];
+ const size_t $(kern.name)_n_impls;
#end for
};
diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h
index 52a87242f..6f5426965 100644
--- a/volk/tmpl/volk_typedefs.tmpl.h
+++ b/volk/tmpl/volk_typedefs.tmpl.h
@@ -26,7 +26,7 @@
#include <volk/volk_complex.h>
#for $kern in $kernels
-typedef $kern.rettype (*$(kern.pname))($kern.arglist_defs);
+typedef void (*$(kern.pname))($kern.arglist_types);
#end for
#endif /*INCLUDED_VOLK_TYPEDEFS*/