-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Open
Description
LLVM does a valiant effort of unrolling and vectorizing these loops, but they're really just popcount and it should recognize them as such
#include <stdint.h>
using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
template <typename T>
auto src(T x) -> u64 {
u64 count = 0;
for (u64 i = 0; i < sizeof(T) * 8; i++) {
if (x & ((u64)1 << i)) {
count++;
}
}
return count;
}
template <typename T>
auto tgt(T x) -> u64 {
return __builtin_popcountg(x);
}
extern "C" {
auto src8(u8 x) -> u64 { return src(x); }
auto src16(u16 x) -> u64 { return src(x); }
auto src32(u32 x) -> u64 { return src(x); }
auto src64(u64 x) -> u64 { return src(x); }
auto tgt8(u8 x) -> u64 { return tgt(x); }
auto tgt16(u16 x) -> u64 { return tgt(x); }
auto tgt32(u32 x) -> u64 { return tgt(x); }
auto tgt64(u64 x) -> u64 { return tgt(x); }
}