diff options
Diffstat (limited to 'volk')
-rw-r--r-- | volk/gen/archs.xml | 183 | ||||
-rw-r--r-- | volk/gen/volk_arch_defs.py | 31 | ||||
-rw-r--r-- | volk/tmpl/volk_cpu.tmpl.c | 96 |
3 files changed, 138 insertions, 172 deletions
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml index 2b0e8e508..134dfa2d9 100644 --- a/volk/gen/archs.xml +++ b/volk/gen/archs.xml @@ -1,161 +1,168 @@ <!-- archs appear in order of significance for blind, de-facto version ordering --> <grammar> -<arch name="generic" type="all"> <!-- name and type are both required--> - <flag>none</flag> <!-- flag is the only required field--> +<arch name="generic"> <!-- name is required--> </arch> -<arch name="altivec" type="powerpc"> +<arch name="altivec"> <flag>maltivec</flag> <alignment>16</alignment> + <check name="has_ppc"></check> </arch> -<arch name="neon" type="arm"> - <flag>mfpu=neon,mfloat-abi=softfp,funsafe-math-optimizations</flag> +<arch name="neon"> + <flag>mfpu=neon</flag> + <flag>mfloat-abi=softfp</flag> + <flag>funsafe-math-optimizations</flag> <alignment>16</alignment> + <check name="has_neon"></check> </arch> -<arch name="32" type="x86" no_test="true" > +<arch name="32"> <flag>m32</flag> - <overrule>MD_SUBCPU</overrule> - <overrule_val>x86_64</overrule_val> </arch> -<arch name="64" type="x86"> - <op>0x80000001</op> - <reg>d</reg> - <shift>29</shift> +<arch name="64"> + <check name="check_extended_cpuid"> + <param>0x80000001</param> + </check> + <check name="cpuid_x86_bit"> <!-- checks to see if a bit is set --> + <param>3</param> <!-- eax, ebx, ecx, [edx] --> + <param>0x80000001</param> <!-- cpuid operation --> + <param>29</param> <!-- bit shift --> + </check> <flag>m64</flag> - <val>1</val> - <overrule>MD_SUBCPU</overrule> - <overrule_val>x86</overrule_val> </arch> -<arch name="3dnow" type="x86"> - <op>0x80000001</op> - <reg>d</reg> - <shift>31</shift> +<arch name="3dnow"> + <check name="cpuid_x86_bit"> + <param>3</param> + <param>0x80000001</param> + <param>31</param> + </check> <flag>m3dnow</flag> - <val>1</val> <alignment>8</alignment> </arch> -<arch name="abm" type="x86"> - <val>1</val> - <op>0x80000001</op> - <reg>d</reg> - <shift>5</shift> +<arch name="abm"> + <check name="cpuid_x86_bit"> + <param>3</param> + <param>0x80000001</param> + <param>5</param> + </check> <flag>msse4.2</flag> <alignment>16</alignment> </arch> -<arch name="popcount" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>23</shift> +<arch name="popcount"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>23</param> + </check> <flag>mpopcnt</flag> </arch> -<arch name="mmx" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>23</shift> +<arch name="mmx"> + <check name="cpuid_x86_bit"> + <param>3</param> + <param>0x00000001</param> + <param>23</param> + </check> <flag>mmmx</flag> <alignment>8</alignment> </arch> - -<arch name="sse" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>25</shift> +<arch name="sse"> + <check name="cpuid_x86_bit"> + <param>3</param> + <param>0x00000001</param> + <param>25</param> + </check> <flag>msse</flag> <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment> <include>xmmintrin.h</include> <alignment>16</alignment> </arch> - -<arch name="sse2" type="x86"> - <val>1</val> - <op>1</op> - <reg>d</reg> - <shift>26</shift> +<arch name="sse2"> + <check name="cpuid_x86_bit"> + <param>3</param> + <param>0x00000001</param> + <param>26</param> + </check> <flag>msse2</flag> <alignment>16</alignment> </arch> -<arch name="orc" type="all"> - <flag>none</flag> - <overrule>LV_HAVE_ORC</overrule> - <overrule_val>no</overrule_val> +<arch name="orc"> </arch> -<arch name="norc" type="all"> - <flag>none</flag> - <overrule>LV_HAVE_ORC</overrule> - <overrule_val>no</overrule_val> +<!-- it's here for overrule stuff. --> +<arch name="norc"> </arch> -<arch name="sse3" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>0</shift> +<arch name="sse3"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>0</param> + </check> <flag>msse3</flag> <environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment> <include>pmmintrin.h</include> <alignment>16</alignment> </arch> -<arch name="ssse3" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>9</shift> +<arch name="ssse3"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>9</param> + </check> <flag>mssse3</flag> <alignment>16</alignment> </arch> -<arch name="sse4_a" type="x86"> - <val>1</val> - <op>0x80000001</op> - <reg>c</reg> - <shift>6</shift> +<arch name="sse4_a"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x80000001</param> + <param>6</param> + </check> <flag>msse4a</flag> <alignment>16</alignment> </arch> - -<arch name="sse4_1" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>19</shift> +<arch name="sse4_1"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>19</param> + </check> <flag>msse4.1</flag> <alignment>16</alignment> </arch> -<arch name="sse4_2" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>20</shift> +<arch name="sse4_2"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>20</param> + </check> <flag>msse4.2</flag> <alignment>16</alignment> </arch> -<arch name="avx" type="x86"> - <val>1</val> - <op>1</op> - <reg>c</reg> - <shift>28</shift> +<arch name="avx"> + <check name="cpuid_x86_bit"> + <param>2</param> + <param>0x00000001</param> + <param>28</param> + </check> + <!-- check to see that the OS has enabled AVX --> + <check name="get_avx_enabled"></check> <flag>mavx</flag> - <check>xgetbv</check> - <checkval>7</checkval> <alignment>32</alignment> </arch> diff --git a/volk/gen/volk_arch_defs.py b/volk/gen/volk_arch_defs.py index fd81eed8c..d64f8def2 100644 --- a/volk/gen/volk_arch_defs.py +++ b/volk/gen/volk_arch_defs.py @@ -22,28 +22,18 @@ arch_dict = dict() create_unaligned_archs = False class arch_class: - def __init__(self, **kwargs): + def __init__(self, flags, checks, **kwargs): for key, cast, failval in ( ('name', str, None), - ('type', str, None), - ('no_test', bool, False), - ('val', int, None), - ('op', eval, None), - ('reg', str, None), - ('shift', int, None), - ('flag', str, None), ('environment', str, None), ('include', str, None), - ('alignment', int, 1), - ('check', str, None), + ('alignment', int, 1) ): try: setattr(self, key, cast(kwargs[key])) except: setattr(self, key, failval) + self.checks = checks assert(self.name) - assert(self.type) - if self.flag == 'none': self.flag = None - self.flags = list() - if self.flag: self.flags = map(str.strip, self.flag.split(',')) + self.flags = flags def __repr__(self): return self.name @@ -74,7 +64,18 @@ for arch_xml in archs_xml: val = arch_xml.getElementsByTagName(name)[0].firstChild.data kwargs[name] = val except: pass - register_arch(**kwargs) + checks = [] + for check_xml in arch_xml.getElementsByTagName("check"): + name = check_xml.attributes["name"].value + params = list() + for param_xml in check_xml.getElementsByTagName("param"): + params.append(param_xml.firstChild.data) + checks.append([name, params]) + flags = [] + for flag_xml in arch_xml.getElementsByTagName("flag"): + flags.append(flag_xml.firstChild.data) + + register_arch(flags=flags, checks=checks, **kwargs) if __name__ == '__main__': print archs diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c index 1bd1ad211..b050d8aea 100644 --- a/volk/tmpl/volk_cpu.tmpl.c +++ b/volk/tmpl/volk_cpu.tmpl.c @@ -64,40 +64,39 @@ struct VOLK_CPU volk_cpu; #else #error "A get cpuid for volk is not available on this compiler..." -#endif +#endif //defined(__GNUC__) -static inline unsigned int cpuid_eax(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[0]; -} +#endif //defined(VOLK_CPU_x86) -static inline unsigned int cpuid_ebx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[1]; -} - -static inline unsigned int cpuid_ecx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[2]; +static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(op, regs); + return regs[reg] >> bit & 0x01; +#else + return 0; +#endif } -static inline unsigned int cpuid_edx(unsigned int op) { - int regs[4]; - cpuid_x86 (op, regs); - return regs[3]; +static inline unsigned int check_extended_cpuid(unsigned int val) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(0x80000000, regs); + return regs[0] >= val; +#else + return 0; +#endif } -static inline unsigned int xgetbv(void) { +static inline unsigned int get_avx_enabled(void) { +#if defined(VOLK_CPU_x86) //check to make sure that xgetbv is enabled in OS - int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01; - if (xgetbv_enabled == 0) return 0; + if(!cpuid_x86_bit(2, 1, 27)) return 0; return __xgetbv() & 0x6; -} - +#else + return 0; #endif +} //neon detection is linux specific #if defined(__arm__) && defined(__linux__) @@ -140,51 +139,10 @@ static int has_ppc(void){ #for $arch in $archs static int i_can_has_$arch.name (void) { -######################################################################## - #if $arch.type == "x86" and $arch.no_test -#if defined(VOLK_CPU_x86) - return 1; -#else - return 0; -#endif -######################################################################## - #else if $arch.op == 1 -#if defined(VOLK_CPU_x86) - #set $op = hex($arch.op) - unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op); - unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val; - #if $arch.check - if ($(arch.check)() == 0) return 0; - #end if - return hwcap; -#else - return 0; -#endif -######################################################################## - #else if $arch.op == 0x80000001 -#if defined(VOLK_CPU_x86) - #set $op = hex($arch.op) - unsigned int extended_fct_count = cpuid_eax(0x80000000); - if (extended_fct_count < 0x80000001) - return $(arch.val)^1; - unsigned int extended_features = cpuid_e$(arch.reg)x ($op); - return ((extended_features >> $arch.shift) & 1) == $arch.val; -#else - return 0; -#endif -######################################################################## - #else if $arch.type == "powerpc" - return has_ppc(); -######################################################################## - #else if $arch.type == "arm" - return has_neon(); -######################################################################## - #else if $arch.type == "all" + #for $check, $params in $arch.checks + if ($(check)($(', '.join($params))) == 0) return 0; + #end for return 1; -######################################################################## - #else ##$ - return 0; - #end if } #end for |