summaryrefslogtreecommitdiff
path: root/volk
diff options
context:
space:
mode:
Diffstat (limited to 'volk')
-rw-r--r--volk/gen/archs.xml183
-rw-r--r--volk/gen/volk_arch_defs.py31
-rw-r--r--volk/tmpl/volk_cpu.tmpl.c96
3 files changed, 138 insertions, 172 deletions
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml
index 2b0e8e508..134dfa2d9 100644
--- a/volk/gen/archs.xml
+++ b/volk/gen/archs.xml
@@ -1,161 +1,168 @@
<!-- archs appear in order of significance for blind, de-facto version ordering -->
<grammar>
-<arch name="generic" type="all"> <!-- name and type are both required-->
- <flag>none</flag> <!-- flag is the only required field-->
+<arch name="generic"> <!-- name is required-->
</arch>
-<arch name="altivec" type="powerpc">
+<arch name="altivec">
<flag>maltivec</flag>
<alignment>16</alignment>
+ <check name="has_ppc"></check>
</arch>
-<arch name="neon" type="arm">
- <flag>mfpu=neon,mfloat-abi=softfp,funsafe-math-optimizations</flag>
+<arch name="neon">
+ <flag>mfpu=neon</flag>
+ <flag>mfloat-abi=softfp</flag>
+ <flag>funsafe-math-optimizations</flag>
<alignment>16</alignment>
+ <check name="has_neon"></check>
</arch>
-<arch name="32" type="x86" no_test="true" >
+<arch name="32">
<flag>m32</flag>
- <overrule>MD_SUBCPU</overrule>
- <overrule_val>x86_64</overrule_val>
</arch>
-<arch name="64" type="x86">
- <op>0x80000001</op>
- <reg>d</reg>
- <shift>29</shift>
+<arch name="64">
+ <check name="check_extended_cpuid">
+ <param>0x80000001</param>
+ </check>
+ <check name="cpuid_x86_bit"> <!-- checks to see if a bit is set -->
+ <param>3</param> <!-- eax, ebx, ecx, [edx] -->
+ <param>0x80000001</param> <!-- cpuid operation -->
+ <param>29</param> <!-- bit shift -->
+ </check>
<flag>m64</flag>
- <val>1</val>
- <overrule>MD_SUBCPU</overrule>
- <overrule_val>x86</overrule_val>
</arch>
-<arch name="3dnow" type="x86">
- <op>0x80000001</op>
- <reg>d</reg>
- <shift>31</shift>
+<arch name="3dnow">
+ <check name="cpuid_x86_bit">
+ <param>3</param>
+ <param>0x80000001</param>
+ <param>31</param>
+ </check>
<flag>m3dnow</flag>
- <val>1</val>
<alignment>8</alignment>
</arch>
-<arch name="abm" type="x86">
- <val>1</val>
- <op>0x80000001</op>
- <reg>d</reg>
- <shift>5</shift>
+<arch name="abm">
+ <check name="cpuid_x86_bit">
+ <param>3</param>
+ <param>0x80000001</param>
+ <param>5</param>
+ </check>
<flag>msse4.2</flag>
<alignment>16</alignment>
</arch>
-<arch name="popcount" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>23</shift>
+<arch name="popcount">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>23</param>
+ </check>
<flag>mpopcnt</flag>
</arch>
-<arch name="mmx" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>d</reg>
- <shift>23</shift>
+<arch name="mmx">
+ <check name="cpuid_x86_bit">
+ <param>3</param>
+ <param>0x00000001</param>
+ <param>23</param>
+ </check>
<flag>mmmx</flag>
<alignment>8</alignment>
</arch>
-
-<arch name="sse" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>d</reg>
- <shift>25</shift>
+<arch name="sse">
+ <check name="cpuid_x86_bit">
+ <param>3</param>
+ <param>0x00000001</param>
+ <param>25</param>
+ </check>
<flag>msse</flag>
<environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
<include>xmmintrin.h</include>
<alignment>16</alignment>
</arch>
-
-<arch name="sse2" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>d</reg>
- <shift>26</shift>
+<arch name="sse2">
+ <check name="cpuid_x86_bit">
+ <param>3</param>
+ <param>0x00000001</param>
+ <param>26</param>
+ </check>
<flag>msse2</flag>
<alignment>16</alignment>
</arch>
-<arch name="orc" type="all">
- <flag>none</flag>
- <overrule>LV_HAVE_ORC</overrule>
- <overrule_val>no</overrule_val>
+<arch name="orc">
</arch>
-<arch name="norc" type="all">
- <flag>none</flag>
- <overrule>LV_HAVE_ORC</overrule>
- <overrule_val>no</overrule_val>
+<!-- it's here for overrule stuff. -->
+<arch name="norc">
</arch>
-<arch name="sse3" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>0</shift>
+<arch name="sse3">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>0</param>
+ </check>
<flag>msse3</flag>
<environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment>
<include>pmmintrin.h</include>
<alignment>16</alignment>
</arch>
-<arch name="ssse3" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>9</shift>
+<arch name="ssse3">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>9</param>
+ </check>
<flag>mssse3</flag>
<alignment>16</alignment>
</arch>
-<arch name="sse4_a" type="x86">
- <val>1</val>
- <op>0x80000001</op>
- <reg>c</reg>
- <shift>6</shift>
+<arch name="sse4_a">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x80000001</param>
+ <param>6</param>
+ </check>
<flag>msse4a</flag>
<alignment>16</alignment>
</arch>
-
-<arch name="sse4_1" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>19</shift>
+<arch name="sse4_1">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>19</param>
+ </check>
<flag>msse4.1</flag>
<alignment>16</alignment>
</arch>
-<arch name="sse4_2" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>20</shift>
+<arch name="sse4_2">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>20</param>
+ </check>
<flag>msse4.2</flag>
<alignment>16</alignment>
</arch>
-<arch name="avx" type="x86">
- <val>1</val>
- <op>1</op>
- <reg>c</reg>
- <shift>28</shift>
+<arch name="avx">
+ <check name="cpuid_x86_bit">
+ <param>2</param>
+ <param>0x00000001</param>
+ <param>28</param>
+ </check>
+ <!-- check to see that the OS has enabled AVX -->
+ <check name="get_avx_enabled"></check>
<flag>mavx</flag>
- <check>xgetbv</check>
- <checkval>7</checkval>
<alignment>32</alignment>
</arch>
diff --git a/volk/gen/volk_arch_defs.py b/volk/gen/volk_arch_defs.py
index fd81eed8c..d64f8def2 100644
--- a/volk/gen/volk_arch_defs.py
+++ b/volk/gen/volk_arch_defs.py
@@ -22,28 +22,18 @@ arch_dict = dict()
create_unaligned_archs = False
class arch_class:
- def __init__(self, **kwargs):
+ def __init__(self, flags, checks, **kwargs):
for key, cast, failval in (
('name', str, None),
- ('type', str, None),
- ('no_test', bool, False),
- ('val', int, None),
- ('op', eval, None),
- ('reg', str, None),
- ('shift', int, None),
- ('flag', str, None),
('environment', str, None),
('include', str, None),
- ('alignment', int, 1),
- ('check', str, None),
+ ('alignment', int, 1)
):
try: setattr(self, key, cast(kwargs[key]))
except: setattr(self, key, failval)
+ self.checks = checks
assert(self.name)
- assert(self.type)
- if self.flag == 'none': self.flag = None
- self.flags = list()
- if self.flag: self.flags = map(str.strip, self.flag.split(','))
+ self.flags = flags
def __repr__(self): return self.name
@@ -74,7 +64,18 @@ for arch_xml in archs_xml:
val = arch_xml.getElementsByTagName(name)[0].firstChild.data
kwargs[name] = val
except: pass
- register_arch(**kwargs)
+ checks = []
+ for check_xml in arch_xml.getElementsByTagName("check"):
+ name = check_xml.attributes["name"].value
+ params = list()
+ for param_xml in check_xml.getElementsByTagName("param"):
+ params.append(param_xml.firstChild.data)
+ checks.append([name, params])
+ flags = []
+ for flag_xml in arch_xml.getElementsByTagName("flag"):
+ flags.append(flag_xml.firstChild.data)
+
+ register_arch(flags=flags, checks=checks, **kwargs)
if __name__ == '__main__':
print archs
diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c
index 1bd1ad211..b050d8aea 100644
--- a/volk/tmpl/volk_cpu.tmpl.c
+++ b/volk/tmpl/volk_cpu.tmpl.c
@@ -64,40 +64,39 @@ struct VOLK_CPU volk_cpu;
#else
#error "A get cpuid for volk is not available on this compiler..."
-#endif
+#endif //defined(__GNUC__)
-static inline unsigned int cpuid_eax(unsigned int op) {
- int regs[4];
- cpuid_x86 (op, regs);
- return regs[0];
-}
+#endif //defined(VOLK_CPU_x86)
-static inline unsigned int cpuid_ebx(unsigned int op) {
- int regs[4];
- cpuid_x86 (op, regs);
- return regs[1];
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op) {
- int regs[4];
- cpuid_x86 (op, regs);
- return regs[2];
+static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
+#if defined(VOLK_CPU_x86)
+ unsigned int regs[4];
+ cpuid_x86(op, regs);
+ return regs[reg] >> bit & 0x01;
+#else
+ return 0;
+#endif
}
-static inline unsigned int cpuid_edx(unsigned int op) {
- int regs[4];
- cpuid_x86 (op, regs);
- return regs[3];
+static inline unsigned int check_extended_cpuid(unsigned int val) {
+#if defined(VOLK_CPU_x86)
+ unsigned int regs[4];
+ cpuid_x86(0x80000000, regs);
+ return regs[0] >= val;
+#else
+ return 0;
+#endif
}
-static inline unsigned int xgetbv(void) {
+static inline unsigned int get_avx_enabled(void) {
+#if defined(VOLK_CPU_x86)
//check to make sure that xgetbv is enabled in OS
- int xgetbv_enabled = cpuid_ecx(1) >> 27 & 0x01;
- if (xgetbv_enabled == 0) return 0;
+ if(!cpuid_x86_bit(2, 1, 27)) return 0;
return __xgetbv() & 0x6;
-}
-
+#else
+ return 0;
#endif
+}
//neon detection is linux specific
#if defined(__arm__) && defined(__linux__)
@@ -140,51 +139,10 @@ static int has_ppc(void){
#for $arch in $archs
static int i_can_has_$arch.name (void) {
-########################################################################
- #if $arch.type == "x86" and $arch.no_test
-#if defined(VOLK_CPU_x86)
- return 1;
-#else
- return 0;
-#endif
-########################################################################
- #else if $arch.op == 1
-#if defined(VOLK_CPU_x86)
- #set $op = hex($arch.op)
- unsigned int e$(arch.reg)x = cpuid_e$(arch.reg)x ($op);
- unsigned int hwcap = ((e$(arch.reg)x >> $arch.shift) & 1) == $arch.val;
- #if $arch.check
- if ($(arch.check)() == 0) return 0;
- #end if
- return hwcap;
-#else
- return 0;
-#endif
-########################################################################
- #else if $arch.op == 0x80000001
-#if defined(VOLK_CPU_x86)
- #set $op = hex($arch.op)
- unsigned int extended_fct_count = cpuid_eax(0x80000000);
- if (extended_fct_count < 0x80000001)
- return $(arch.val)^1;
- unsigned int extended_features = cpuid_e$(arch.reg)x ($op);
- return ((extended_features >> $arch.shift) & 1) == $arch.val;
-#else
- return 0;
-#endif
-########################################################################
- #else if $arch.type == "powerpc"
- return has_ppc();
-########################################################################
- #else if $arch.type == "arm"
- return has_neon();
-########################################################################
- #else if $arch.type == "all"
+ #for $check, $params in $arch.checks
+ if ($(check)($(', '.join($params))) == 0) return 0;
+ #end for
return 1;
-########################################################################
- #else ##$
- return 0;
- #end if
}
#end for