summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_64u_popcnt_aligned16.h
diff options
context:
space:
mode:
Diffstat (limited to 'volk/include/volk/volk_64u_popcnt_aligned16.h')
-rw-r--r--volk/include/volk/volk_64u_popcnt_aligned16.h74
1 files changed, 74 insertions, 0 deletions
diff --git a/volk/include/volk/volk_64u_popcnt_aligned16.h b/volk/include/volk/volk_64u_popcnt_aligned16.h
new file mode 100644
index 000000000..e8e1396e7
--- /dev/null
+++ b/volk/include/volk/volk_64u_popcnt_aligned16.h
@@ -0,0 +1,74 @@
+#ifndef INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
+#define INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
+
+#include <stdio.h>
+#include <inttypes.h>
+
+
+#if LV_HAVE_GENERIC
+
+
+static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64_t value) {
+
+ const uint32_t* valueVector = (const uint32_t*)&value;
+
+ // This is faster than a lookup table
+ uint32_t retVal = valueVector[0];
+
+ retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
+ retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
+ retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
+ retVal = (retVal + (retVal >> 8));
+ retVal = (retVal + (retVal >> 16)) & 0x0000003F;
+ uint64_t retVal64 = retVal;
+
+ retVal = valueVector[1];
+ retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
+ retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
+ retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
+ retVal = (retVal + (retVal >> 8));
+ retVal = (retVal + (retVal >> 16)) & 0x0000003F;
+ retVal64 += retVal;
+
+ *ret = retVal64;
+
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+#if LV_HAVE_SSE4_2
+
+#include <nmmintrin.h>
+
+static inline void volk_64u_popcnt_aligned16_sse4_2(uint64_t* ret, const uint64_t value) {
+#if LV_64
+ *ret = _mm_popcnt_u64(value);
+#else
+ const uint32_t* valueVector = (const uint32_t*)&value;
+
+ // This is faster than a lookup table
+ uint32_t retVal = valueVector[0];
+
+ retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
+ retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
+ retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
+ retVal = (retVal + (retVal >> 8));
+ retVal = (retVal + (retVal >> 16)) & 0x0000003F;
+ uint64_t retVal64 = retVal;
+
+ retVal = valueVector[1];
+ retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
+ retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
+ retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
+ retVal = (retVal + (retVal >> 8));
+ retVal = (retVal + (retVal >> 16)) & 0x0000003F;
+ retVal64 += retVal;
+
+ *ret = retVal64;
+
+#endif
+}
+
+#endif /*LV_HAVE_SSE4_2*/
+
+#endif /*INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H*/