blob: 37cfd112c01923586372a4326d90c39b4a0be610 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#ifndef INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H
#define INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H
#include <stdio.h>
#include <inttypes.h>
#if LV_HAVE_GENERIC
static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32_t value) {
// This is faster than a lookup table
uint32_t retVal = value;
retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
retVal = (retVal + (retVal >> 8));
retVal = (retVal + (retVal >> 16)) & 0x0000003F;
*ret = retVal;
}
#endif /*LV_HAVE_GENERIC*/
#if LV_HAVE_SSE4_2
#include <nmmintrin.h>
static inline void volk_32u_popcnt_aligned16_sse4_2(uint32_t* ret, const uint32_t value) {
*ret = _mm_popcnt_u32(value);
}
#endif /*LV_HAVE_SSE4_2*/
#endif /*INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H*/
|