diff options
Diffstat (limited to 'volk/kernels/README.txt')
-rw-r--r-- | volk/kernels/README.txt | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/volk/kernels/README.txt b/volk/kernels/README.txt new file mode 100644 index 000000000..5dd7434b5 --- /dev/null +++ b/volk/kernels/README.txt @@ -0,0 +1,67 @@ +######################################################################## +# How to create custom kernel dispatchers +######################################################################## +A kernel dispatcher is kernel implementation that calls other kernel implementations. +By default, a dispatcher is generated by the build system for every kernel such that: + * the best aligned implemention is called when all pointer arguments are aligned, + * and otherwise the best unaligned implementation is called. + +The author of a VOLK kernel may create a custom dispatcher, +to be called in place of the automatically generated one. +A custom dispatcher may be useful to handle head and tail cases, +or to implement different alignment and bounds checking logic. + +######################################################################## +# Code for an example dispatcher w/ tail case +######################################################################## +#include <volk/volk_common.h> + +#ifdef LV_HAVE_DISPATCHER + +static inline void volk_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points) +{ + const unsigned int num_points_r = num_points%4; + const unsigned int num_points_x = num_points - num_points_r; + + if (volk_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector)))) + { + volk_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x); + } + else + { + volk_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x); + } + + volk_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r); +} + +#endif //LV_HAVE_DISPATCHER + +######################################################################## +# Code for an example dispatcher w/ tail case and accumulator +######################################################################## +#include <volk/volk_common.h> + +#ifdef LV_HAVE_DISPATCHER + +static inline void volk_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points) +{ + const unsigned int num_points_r = num_points%16; + const unsigned int num_points_x = num_points - num_points_r; + + if (volk_is_aligned(VOLK_OR_PTR(input, taps))) + { + volk_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x); + } + else + { + volk_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x); + } + + float result_tail = 0; + volk_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r); + + *result += result_tail; +} + +#endif //LV_HAVE_DISPATCHER |