summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_32u_popcnt_aligned16.h
blob: 37cfd112c01923586372a4326d90c39b4a0be610 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#ifndef INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H
#define INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H

#include <stdio.h>
#include <inttypes.h>


#if LV_HAVE_GENERIC

static inline void volk_32u_popcnt_aligned16_generic(uint32_t* ret, const uint32_t value) {

  // This is faster than a lookup table
  uint32_t retVal = value;

  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
  retVal = (retVal + (retVal >> 8));
  retVal = (retVal + (retVal >> 16)) & 0x0000003F;

  *ret = retVal;
}

#endif /*LV_HAVE_GENERIC*/

#if LV_HAVE_SSE4_2

#include <nmmintrin.h>

static inline void volk_32u_popcnt_aligned16_sse4_2(uint32_t* ret, const uint32_t value) {
  *ret = _mm_popcnt_u32(value);
}

#endif /*LV_HAVE_SSE4_2*/

#endif /*INCLUDED_VOLK_32u_POPCNT_ALIGNED16_H*/