summaryrefslogtreecommitdiff
path: root/volk/tmpl/volk_cpu.tmpl.c
diff options
context:
space:
mode:
Diffstat (limited to 'volk/tmpl/volk_cpu.tmpl.c')
-rw-r--r--volk/tmpl/volk_cpu.tmpl.c33
1 files changed, 31 insertions, 2 deletions
diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c
index ff27a7f96..58b2cfbd2 100644
--- a/volk/tmpl/volk_cpu.tmpl.c
+++ b/volk/tmpl/volk_cpu.tmpl.c
@@ -40,11 +40,28 @@ struct VOLK_CPU volk_cpu;
#endif
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
+ /* Return Intel AVX extended CPU capabilities register.
+ * This function will bomb on non-AVX-capable machines, so
+ * check for AVX capability before executing.
+ */
+ static inline unsigned int __xgetbv(void)
+ {
+ unsigned int index, __eax, __edx;
+ __asm__ ("xgetbv" : "=a"(__eax), "=d"(__edx) : "c" (index));
+ return __eax;
+ }
+
//implement get cpuid for MSVC compilers using __cpuid intrinsic
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
#include <intrin.h>
#define cpuid_x86(op, r) __cpuid(r, op)
+ #if defined(_XCR_XFEATURE_ENABLED_MASK)
+ #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
+ #else
+ #define __xgetbv() 0
+ #endif
+
#else
#error "A get cpuid for volk is not available on this compiler..."
#endif
@@ -72,6 +89,14 @@ static inline unsigned int cpuid_edx(unsigned int op) {
cpuid_x86 (op, regs);
return regs[3];
}
+
+static inline unsigned int xgetbv(void) {
+ //check to make sure that xgetbv is enabled in OS
+ int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01;
+ if (xgetbv_enabled == 0) return 0;
+ return __xgetbv() & 0x6;
+}
+
#endif
//neon detection is linux specific
@@ -114,7 +139,7 @@ static int has_ppc(void){
}
#for $arch in $archs
-static int i_can_has_$arch.name () {
+static int i_can_has_$arch.name (void) {
########################################################################
#if $arch.type == "x86" and $arch.no_test
#if defined(VOLK_CPU_x86)
@@ -127,7 +152,11 @@ static int i_can_has_$arch.name () {
#if defined(VOLK_CPU_x86)
#set $op = hex($arch.op)
unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op);
- return ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val;
+ unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val;
+ #if $arch.check
+ if ($(arch.check)() == 0) return 0;
+ #end if
+ return hwcap;
#else
return 0;
#endif