Skip to content

Commit 29a6bd3

Browse files
wu0607facebook-github-bot
authored andcommitted
Revert D72676728: Add writeVarintSve for aarch64
Differential Revision: D72676728 Original commit changeset: 430180214820 Original Phabricator Diff: D72676728 fbshipit-source-id: 456b3d0c70f44bb8df162592255ce6ab392e136b
1 parent 9e3c1e0 commit 29a6bd3

File tree

2 files changed

+12
-102
lines changed

2 files changed

+12
-102
lines changed

third-party/thrift/src/thrift/lib/cpp/util/VarintUtils-inl.h

Lines changed: 10 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,13 @@
5050
// apple silicon can run most x86-64 instructions, but not necessarily all
5151
#define THRIFT_UTIL_VARINTUTILS_BRANCH_FREE_ENCODER 1
5252
#elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE2_BITPERM) && \
53-
__has_include(<arm_neon_sve_bridge.h>) && !FOLLY_MOBILE
53+
__has_include(<arm_neon_sve_bridge.h>)
5454
#define THRIFT_UTIL_VARINTUTILS_BRANCH_FREE_ENCODER 1
5555
#else
5656
#define THRIFT_UTIL_VARINTUTILS_BRANCH_FREE_ENCODER 0
5757
#endif
5858

5959
#if THRIFT_UTIL_VARINTUTILS_BRANCH_FREE_ENCODER && FOLLY_AARCH64
60-
#include <arm_neon.h>
6160
#include <arm_neon_sve_bridge.h> // @manual
6261
#include <arm_sve.h>
6362
#endif
@@ -431,98 +430,20 @@ uint8_t writeVarintUnrolled(Cursor& c, T value) {
431430

432431
#if THRIFT_UTIL_VARINTUTILS_BRANCH_FREE_ENCODER
433432

434-
#if FOLLY_AARCH64
435-
436-
template <class Cursor, class T>
437-
uint8_t writeVarintSve(Cursor& c, T valueS) {
438-
auto value = folly::to_unsigned(valueS);
439-
if (FOLLY_LIKELY((value & ~0x7f) == 0)) {
440-
c.template write<uint8_t>(static_cast<uint8_t>(value));
441-
return 1;
442-
}
443-
444-
if constexpr (sizeof(T) == 1) {
445-
c.template write<uint16_t>(static_cast<uint16_t>(value | 0x100));
446-
return 2;
447-
}
448-
449-
enum { maxSize = (8 * sizeof(T) + 6) / 7 };
450-
c.ensure(maxSize);
451-
452-
svuint8_t bdepMask = svset_neonq_u8(svundef_u8(), vdupq_n_u8(0x7f));
453-
uint64x2_t clzMask = vreinterpretq_u64_u8(vdupq_n_u8(0xff));
454-
uint64x2_t vec;
455-
vec[0] = value;
456-
457-
vec = svget_neonq_u64(svbdep_u64(
458-
svset_neonq_u64(svundef_u64(), vec), svreinterpret_u64_u8(bdepMask)));
459-
460-
svuint64_t clzV;
461-
uint64x2_t clzMaskV;
462-
if constexpr (sizeof(T) == 2) {
463-
clzV = svset_neonq_u64(svundef_u64(), vclzq_u32(vec));
464-
clzMaskV = svget_neonq_u32(svlsr_u32_x(
465-
svptrue_b32(),
466-
svset_neonq_u32(svundef_u32(), vreinterpretq_u32_u64(clzMask)),
467-
svreinterpret_u32_u64(clzV)));
468-
} else {
469-
clzV = svclz_u64_x(svptrue_b64(), svset_neonq_u64(svundef_u64(), vec));
470-
clzMaskV = svget_neonq_u64(svlsr_u64_x(
471-
svptrue_b64(), svset_neonq_u64(svundef_u64(), clzMask), clzV));
472-
}
473-
474-
svuint64_t sizeSV = svlsr_n_u64_x(svptrue_b64(), clzV, 3);
475-
476-
if constexpr (sizeof(T) == 2) {
477-
sizeSV = svsubr_n_u64_x(svptrue_b64(), sizeSV, 4);
478-
} else {
479-
sizeSV = svsubr_n_u64_x(svptrue_b64(), sizeSV, 8);
480-
}
481-
482-
vec = svget_neonq_u8(svorr_n_u8_x(
483-
svptrue_b8(),
484-
svset_neonq_u8(svundef_u8(), vreinterpretq_u8_u64(vec)),
485-
0x80));
486-
487-
vec = vandq_u64(vec, clzMaskV);
488-
489-
if constexpr (sizeof(T) == 8) {
490-
uint64_t orMask = value < (1ull << 56) ? 0 : 0x80;
491-
uint64x2_t orMaskV = vreinterpretq_u64_u8(vdupq_n_u8(orMask));
492-
vec = vorrq_u64(vec, orMaskV);
493-
}
494-
495-
uint8_t* p = c.writableData();
496-
497-
if constexpr (sizeof(T) == sizeof(uint16_t)) {
498-
vst1q_lane_u16(p, vreinterpretq_u16_u64(vec), 0);
499-
vst1q_lane_u8(p + 2, vreinterpretq_u8_u64(vec), 2);
500-
} else if constexpr (sizeof(T) == sizeof(uint32_t)) {
501-
vst1q_lane_u32(p, vreinterpretq_u32_u64(vec), 0);
502-
vst1q_lane_u8(p + 4, vreinterpretq_u8_u64(vec), 4);
503-
} else {
504-
vst1_u8(p, vget_low_u64(vreinterpretq_u8_u64(vec)));
505-
p[8] = value >> 56;
506-
p[9] = value >> 63;
507-
}
508-
509-
uint8_t size = svget_neonq_u64(sizeSV)[0];
510-
if constexpr (sizeof(T) == 8) {
511-
size = value < (1ull << 56) ? size : (value >> 63) + 9;
512-
}
513-
514-
c.append(size);
515-
return size;
516-
}
517-
518-
#else
519-
520433
inline uint64_t compressBits(uint64_t value, uint64_t mask) {
434+
#if FOLLY_X64
521435
return _pdep_u64(value, mask);
436+
#elif FOLLY_AARCH64
437+
// See https://godbolt.org/z/nhc443acd
438+
const auto vec = svbdep_u64(svdup_n_u64(value), svdup_n_u64(mask));
439+
return vgetq_lane_u64(svget_neonq_u64(vec), 0);
440+
#else
441+
static_assert(0, "no pdep-equivalent instruction is available");
442+
#endif // __BMI2__, __ARM_FEATURE_SVE2_BITPERM
522443
}
523444

524445
template <class Cursor, class T>
525-
uint8_t writeVarintBranchFreeX86(Cursor& c, T valueS) {
446+
uint8_t writeVarintBranchFree(Cursor& c, T valueS) {
526447
auto value = folly::to_unsigned(valueS);
527448
if (FOLLY_LIKELY((value & ~0x7f) == 0)) {
528449
c.template write<uint8_t>(static_cast<uint8_t>(value));
@@ -573,17 +494,6 @@ uint8_t writeVarintBranchFreeX86(Cursor& c, T valueS) {
573494
return size;
574495
}
575496

576-
#endif
577-
578-
template <class Cursor, class T>
579-
uint8_t writeVarintBranchFree(Cursor& c, T valueS) {
580-
#if FOLLY_AARCH64
581-
return writeVarintSve(c, valueS);
582-
#else
583-
return writeVarintBranchFreeX86(c, valueS);
584-
#endif
585-
}
586-
587497
template <class Cursor, class T>
588498
uint8_t writeVarint(Cursor& c, T value) {
589499
return writeVarintBranchFree(c, value);

third-party/thrift/src/thrift/lib/cpp/util/test/VarintUtilsBench.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,8 @@ BENCHMARK_NAMED_PARAM(bench_read, u64_9b, u64_9b())
235235
BENCHMARK_NAMED_PARAM(bench_read, u64_10b, u64_10b())
236236

237237
BENCHMARK_NAMED_PARAM(bench_read, exponential_1b, exponential_1b())
238-
BENCHMARK_NAMED_PARAM(bench_read, exponential_2b, exponential_2b())
239-
BENCHMARK_NAMED_PARAM(bench_read, exponential_3b, exponential_3b())
238+
BENCHMARK_NAMED_PARAM(bench_read, exponential_2b, exponential_1b())
239+
BENCHMARK_NAMED_PARAM(bench_read, exponential_3b, exponential_1b())
240240

241241
int main(int argc, char** argv) {
242242
folly::Init init(&argc, &argv, true);

0 commit comments

Comments
 (0)