diff options
-rw-r--r-- | volk/gen/volk_arch_defs.py | 1 | ||||
-rw-r--r-- | volk/gen/volk_tmpl_utils.py | 2 | ||||
-rw-r--r-- | volk/lib/gcc_x86_cpuid.h | 6 | ||||
-rw-r--r-- | volk/tmpl/volk_cpu.tmpl.c | 33 |
4 files changed, 36 insertions, 6 deletions
diff --git a/volk/gen/volk_arch_defs.py b/volk/gen/volk_arch_defs.py index 4f4796840..fd81eed8c 100644 --- a/volk/gen/volk_arch_defs.py +++ b/volk/gen/volk_arch_defs.py @@ -35,6 +35,7 @@ class arch_class: ('environment', str, None), ('include', str, None), ('alignment', int, 1), + ('check', str, None), ): try: setattr(self, key, cast(kwargs[key])) except: setattr(self, key, failval) diff --git a/volk/gen/volk_tmpl_utils.py b/volk/gen/volk_tmpl_utils.py index 9d7a0d0e5..c215e389e 100644 --- a/volk/gen/volk_tmpl_utils.py +++ b/volk/gen/volk_tmpl_utils.py @@ -34,7 +34,7 @@ def __escape_pre_processor(code): m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line) if m: p0, p1, fcn, stuff = m.groups() - conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif') + conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma') both = fcn in ('if', 'else') istmpl = '$' in stuff if 'defined' in stuff: istmpl = False diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h index e0254f192..3c3f47b00 100644 --- a/volk/lib/gcc_x86_cpuid.h +++ b/volk/lib/gcc_x86_cpuid.h @@ -5,16 +5,16 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) any * later version. - * + * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * + * * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. - * + * * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index ff27a7f96..58b2cfbd2 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -40,11 +40,28 @@ struct VOLK_CPU volk_cpu; #endif #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) + /* Return Intel AVX extended CPU capabilities register. + * This function will bomb on non-AVX-capable machines, so + * check for AVX capability before executing. + */ + static inline unsigned int __xgetbv(void) + { + unsigned int index, __eax, __edx; + __asm__ ("xgetbv" : "=a"(__eax), "=d"(__edx) : "c" (index)); + return __eax; + } + //implement get cpuid for MSVC compilers using __cpuid intrinsic #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) #include <intrin.h> #define cpuid_x86(op, r) __cpuid(r, op) + #if defined(_XCR_XFEATURE_ENABLED_MASK) + #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) + #else + #define __xgetbv() 0 + #endif + #else #error "A get cpuid for volk is not available on this compiler..." #endif @@ -72,6 +89,14 @@ static inline unsigned int cpuid_edx(unsigned int op) { cpuid_x86 (op, regs); return regs[3]; } + +static inline unsigned int xgetbv(void) { + //check to make sure that xgetbv is enabled in OS + int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01; + if (xgetbv_enabled == 0) return 0; + return __xgetbv() & 0x6; +} + #endif //neon detection is linux specific @@ -114,7 +139,7 @@ static int has_ppc(void){ } #for $arch in $archs -static int i_can_has_$arch.name () { +static int i_can_has_$arch.name (void) { ######################################################################## #if $arch.type == "x86" and $arch.no_test #if defined(VOLK_CPU_x86) @@ -127,7 +152,11 @@ static int i_can_has_$arch.name () { #if defined(VOLK_CPU_x86) #set $op = hex($arch.op) unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op); - return ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; + unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; + #if $arch.check + if ($(arch.check)() == 0) return 0; + #end if + return hwcap; #else return 0; #endif |