summaryrefslogtreecommitdiff
path: root/volk/tmpl/volk_cpu.tmpl.c
blob: 81fc679cbcc1c027c367d7ac330067cd7304e0f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
/*
 * Copyright 2011-2012 Free Software Foundation, Inc.
 *
 * This file is part of GNU Radio
 *
 * GNU Radio is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3, or (at your option)
 * any later version.
 *
 * GNU Radio is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNU Radio; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street,
 * Boston, MA 02110-1301, USA.
 */

#include <volk/volk_cpu.h>
#include <volk/volk_config_fixed.h>
#include <stdlib.h>

struct VOLK_CPU volk_cpu;

#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
    #define VOLK_CPU_x86
#endif

#if defined(VOLK_CPU_x86)

//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
#if defined(__GNUC__)
    #if defined(HAVE_CPUID_H)
        #include <cpuid.h>
    #else
        #include "gcc_x86_cpuid.h"
    #endif
    #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)

    /* Return Intel AVX extended CPU capabilities register.
     * This function will bomb on non-AVX-capable machines, so
     * check for AVX capability before executing.
     */
    #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4
    static inline unsigned long long _xgetbv(unsigned int index){
        unsigned int eax, edx;
        __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
        return ((unsigned long long)edx << 32) | eax;
    }
    #define __xgetbv() _xgetbv(0)
    #else
    #define __xgetbv() 0
    #endif

//implement get cpuid for MSVC compilers using __cpuid intrinsic
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
    #include <intrin.h>
    #define cpuid_x86(op, r) __cpuid(((int*)r), op)

    #if defined(_XCR_XFEATURE_ENABLED_MASK)
    #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
    #else
    #define __xgetbv() 0
    #endif

#else
    #error "A get cpuid for volk is not available on this compiler..."
#endif //defined(__GNUC__)

#endif //defined(VOLK_CPU_x86)

static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
#if defined(VOLK_CPU_x86)
    unsigned int regs[4];
    cpuid_x86(op, regs);
    return regs[reg] >> bit & 0x01;
#else
    return 0;
#endif
}

static inline unsigned int check_extended_cpuid(unsigned int val) {
#if defined(VOLK_CPU_x86)
    unsigned int regs[4];
    cpuid_x86(0x80000000, regs);
    return regs[0] >= val;
#else
    return 0;
#endif
}

static inline unsigned int get_avx_enabled(void) {
#if defined(VOLK_CPU_x86)
    return __xgetbv() & 0x6;
#else
    return 0;
#endif
}

//neon detection is linux specific
#if defined(__arm__) && defined(__linux__)
    #include <asm/hwcap.h>
    #include <linux/auxvec.h>
    #include <stdio.h>
    #define VOLK_CPU_ARM
#endif

static int has_neon(void){
#if defined(VOLK_CPU_ARM)
    FILE *auxvec_f;
    unsigned long auxvec[2];
    unsigned int found_neon = 0;
    auxvec_f = fopen("/proc/self/auxv", "rb");
    if(!auxvec_f) return 0;

    //so auxv is basically 32b of ID and 32b of value
    //so it goes like this
    while(!found_neon && auxvec_f) {
      fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
      if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
        found_neon = 1;
    }

    fclose(auxvec_f);
    return found_neon;
#else
    return 0;
#endif
}

static int has_ppc(void){
#ifdef __PPC__
    return 1;
#else
    return 0;
#endif
}

#for $arch in $archs
static int i_can_has_$arch.name (void) {
    #for $check, $params in $arch.checks
    if ($(check)($(', '.join($params))) == 0) return 0;
    #end for
    return 1;
}

#end for

#if defined(HAVE_FENV_H)
    #include <fenv.h>
    static inline void set_float_rounding(void){
        fesetround(FE_TONEAREST);
    }
#elif defined(_MSC_VER)
    #include <float.h>
    static inline void set_float_rounding(void){
        unsigned int cwrd;
        _controlfp_s(&cwrd, 0, 0);
        _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
    }
#else
    static inline void set_float_rounding(void){
        //do nothing
    }
#endif

void volk_cpu_init() {
    #for $arch in $archs
    volk_cpu.has_$arch.name = &i_can_has_$arch.name;
    #end for
    set_float_rounding();
}

unsigned int volk_get_lvarch() {
    unsigned int retval = 0;
    volk_cpu_init();
    #for $arch in $archs
    retval += volk_cpu.has_$(arch.name)() << LV_$(arch.name.upper());
    #end for
    return retval;
}