summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/gen/volk_arch_defs.py1
-rw-r--r--volk/gen/volk_tmpl_utils.py2
-rw-r--r--volk/lib/gcc_x86_cpuid.h6
-rw-r--r--volk/tmpl/volk_cpu.tmpl.c33
4 files changed, 36 insertions, 6 deletions
diff --git a/volk/gen/volk_arch_defs.py b/volk/gen/volk_arch_defs.py
index 4f4796840..fd81eed8c 100644
--- a/volk/gen/volk_arch_defs.py
+++ b/volk/gen/volk_arch_defs.py
@@ -35,6 +35,7 @@ class arch_class:
('environment', str, None),
('include', str, None),
('alignment', int, 1),
+ ('check', str, None),
):
try: setattr(self, key, cast(kwargs[key]))
except: setattr(self, key, failval)
diff --git a/volk/gen/volk_tmpl_utils.py b/volk/gen/volk_tmpl_utils.py
index 9d7a0d0e5..c215e389e 100644
--- a/volk/gen/volk_tmpl_utils.py
+++ b/volk/gen/volk_tmpl_utils.py
@@ -34,7 +34,7 @@ def __escape_pre_processor(code):
m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
if m:
p0, p1, fcn, stuff = m.groups()
- conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif')
+ conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma')
both = fcn in ('if', 'else')
istmpl = '$' in stuff
if 'defined' in stuff: istmpl = False
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
index e0254f192..3c3f47b00 100644
--- a/volk/lib/gcc_x86_cpuid.h
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -5,16 +5,16 @@
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option) any
* later version.
- *
+ *
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
- *
+ *
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c
index ff27a7f96..58b2cfbd2 100644
--- a/volk/tmpl/volk_cpu.tmpl.c
+++ b/volk/tmpl/volk_cpu.tmpl.c
@@ -40,11 +40,28 @@ struct VOLK_CPU volk_cpu;
#endif
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
+ /* Return Intel AVX extended CPU capabilities register.
+ * This function will bomb on non-AVX-capable machines, so
+ * check for AVX capability before executing.
+ */
+ static inline unsigned int __xgetbv(void)
+ {
+ unsigned int index, __eax, __edx;
+ __asm__ ("xgetbv" : "=a"(__eax), "=d"(__edx) : "c" (index));
+ return __eax;
+ }
+
//implement get cpuid for MSVC compilers using __cpuid intrinsic
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
#include <intrin.h>
#define cpuid_x86(op, r) __cpuid(r, op)
+ #if defined(_XCR_XFEATURE_ENABLED_MASK)
+ #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
+ #else
+ #define __xgetbv() 0
+ #endif
+
#else
#error "A get cpuid for volk is not available on this compiler..."
#endif
@@ -72,6 +89,14 @@ static inline unsigned int cpuid_edx(unsigned int op) {
cpuid_x86 (op, regs);
return regs[3];
}
+
+static inline unsigned int xgetbv(void) {
+ //check to make sure that xgetbv is enabled in OS
+ int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01;
+ if (xgetbv_enabled == 0) return 0;
+ return __xgetbv() & 0x6;
+}
+
#endif
//neon detection is linux specific
@@ -114,7 +139,7 @@ static int has_ppc(void){
}
#for $arch in $archs
-static int i_can_has_$arch.name () {
+static int i_can_has_$arch.name (void) {
########################################################################
#if $arch.type == "x86" and $arch.no_test
#if defined(VOLK_CPU_x86)
@@ -127,7 +152,11 @@ static int i_can_has_$arch.name () {
#if defined(VOLK_CPU_x86)
#set $op = hex($arch.op)
unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op);
- return ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val;
+ unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val;
+ #if $arch.check
+ if ($(arch.check)() == 0) return 0;
+ #end if
+ return hwcap;
#else
return 0;
#endif