1818
1919#include < glog/logging.h>
2020
21+ #ifdef _MSC_VER
22+ #include < immintrin.h>
23+ #endif
24+
2125#include < string_view>
2226
2327#include < folly/CpuId.h>
2428#include < folly/Portability.h>
2529#include < folly/lang/Assume.h>
2630#include < folly/portability/Builtins.h>
2731
28- #if FOLLY_X64 || defined(__i386__)
29- #include < immintrin.h>
30- #endif
31-
3232namespace folly {
3333namespace compression {
3434namespace instructions {
@@ -93,8 +93,15 @@ struct Nehalem : public Default {
9393 }
9494
9595 static FOLLY_ALWAYS_INLINE uint64_t popcount (uint64_t value) {
96- // POPCNT is supported starting with Intel Nehalem, AMD K10.
96+ // POPCNT is supported starting with Intel Nehalem, AMD K10.
97+ #if defined(__GNUC__)
98+ // GCC and Clang won't inline the intrinsics.
99+ uint64_t result;
100+ asm (" popcntq %1, %0" : " =r" (result) : " r" (value));
101+ return result;
102+ #else
97103 return uint64_t (_mm_popcnt_u64 (value));
104+ #endif
98105 }
99106};
100107
@@ -106,18 +113,45 @@ struct Haswell : public Nehalem {
106113 }
107114
108115 static FOLLY_ALWAYS_INLINE uint64_t blsr (uint64_t value) {
109- // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
110- // BLSR combines two instructions into one and reduces register pressure.
116+ // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
117+ // BLSR combines two instructions into one and reduces register pressure.
118+ #if defined(__GNUC__)
119+ // GCC and Clang won't inline the intrinsics.
120+ uint64_t result;
121+ asm (" blsrq %1, %0" : " =r" (result) : " r" (value));
122+ return result;
123+ #else
111124 return _blsr_u64 (value);
125+ #endif
112126 }
113127
114128 static FOLLY_ALWAYS_INLINE uint64_t
115129 bextr (uint64_t value, uint32_t start, uint32_t length) {
130+ #if defined(__GNUC__)
131+ // GCC and Clang won't inline the intrinsics.
132+ // Encode parameters in `pattern` where `pattern[0:7]` is `start` and
133+ // `pattern[8:15]` is `length`.
134+ // Ref: Intel Advanced Vector Extensions Programming Reference
135+ uint64_t pattern = start & 0xFF ;
136+ pattern = pattern | ((length & 0xFF ) << 8 );
137+ uint64_t result;
138+ asm (" bextrq %2, %1, %0" : " =r" (result) : " r" (value), " r" (pattern));
139+ return result;
140+ #else
116141 return _bextr_u64 (value, start, length);
142+ #endif
117143 }
118144
119145 static FOLLY_ALWAYS_INLINE uint64_t bzhi (uint64_t value, uint32_t index) {
146+ #if defined(__GNUC__)
147+ // GCC and Clang won't inline the intrinsics.
148+ const uint64_t index64 = index;
149+ uint64_t result;
150+ asm (" bzhiq %2, %1, %0" : " =r" (result) : " r" (value), " r" (index64));
151+ return result;
152+ #else
120153 return _bzhi_u64 (value, index);
154+ #endif
121155 }
122156};
123157#endif
0 commit comments