Skip to content

Commit 69d9ef2

Browse files
authored
chore: Improve the implementation of simd based packing (dragonflydb#577)
Signed-off-by: Roman Gershman <[email protected]>
1 parent d11b0d1 commit 69d9ef2

File tree

4 files changed

+52
-2
lines changed

4 files changed

+52
-2
lines changed

src/core/compact_object.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ void CompactObj::SetString(std::string_view str) {
674674
}
675675

676676
tl.tmp_buf.resize(encode_len);
677-
detail::ascii_pack_simd(str.data(), str.size(), tl.tmp_buf.data());
677+
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
678678
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};
679679

680680
if (encoded.size() <= kInlineLen) {

src/core/compact_object_test.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ TEST_F(CompactObjectTest, AsciiUtil) {
204204
}
205205
string act_str(data3.size(), 'y');
206206
std::vector<uint8_t> binvec(detail::binpacked_len(data3.size()));
207-
detail::ascii_pack_simd(data3.data(), data3.size(), binvec.data());
207+
detail::ascii_pack_simd2(data3.data(), data3.size(), binvec.data());
208208
detail::ascii_unpack_simd(binvec.data(), data3.size(), act_str.data());
209209

210210
ASSERT_EQ(data3, act_str);
@@ -546,6 +546,16 @@ static void BM_PackSimd(benchmark::State& state) {
546546
}
547547
BENCHMARK(BM_PackSimd);
548548

549+
static void BM_PackSimd2(benchmark::State& state) {
550+
string val(1024, 'a');
551+
uint8_t buf[1024];
552+
553+
while (state.KeepRunning()) {
554+
detail::ascii_pack_simd2(val.data(), val.size(), buf);
555+
}
556+
}
557+
BENCHMARK(BM_PackSimd2);
558+
549559
static void BM_UnpackNaive(benchmark::State& state) {
550560
string val(1024, 'a');
551561
uint8_t buf[1024];

src/core/detail/bitpacking.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,44 @@ void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin) {
141141
ascii_pack(ascii, end - ascii, bin);
142142
}
143143

144+
void ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin) {
145+
// I leave out 16 bytes in addition to 16 that we load in the loop
146+
// because we store into bin full 16 bytes instead of 14. To prevent data
147+
// overwrite we finish loop one iteration earlier.
148+
const char* end = ascii + len - 32;
149+
150+
// Skips 8th byte (indexc 7) in the lower 8-byte part.
151+
const __m128i control = _mm_set_epi8(-1, -1, 14, 13, 12, 11, 10, 9, 8, 6, 5, 4, 3, 2, 1, 0);
152+
153+
__m128i val, rpart, lpart;
154+
155+
// Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111
156+
while (ascii <= end) {
157+
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
158+
159+
/*
160+
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
161+
x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);
162+
x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);
163+
*/
164+
val = _mm_maddubs_epi16(_mm_set1_epi16(0x8001), val);
165+
val = _mm_madd_epi16(_mm_set1_epi32(0x40000001), val);
166+
167+
rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));
168+
lpart = _mm_and_si128(val, _mm_set1_epi64x(0x0FFFFFFF00000000));
169+
val = _mm_or_si128(_mm_srli_epi64(lpart, 4), rpart);
170+
171+
val = _mm_shuffle_epi8(val, control);
172+
_mm_storeu_si128(reinterpret_cast<__m128i*>(bin), val);
173+
bin += 14;
174+
ascii += 16;
175+
}
176+
177+
end += 32; // Bring back end.
178+
DCHECK(ascii < end);
179+
ascii_pack(ascii, end - ascii, bin);
180+
}
181+
144182
// unpacks 8->7 encoded blob back to ascii.
145183
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
146184
// the source buffer.

src/core/detail/bitpacking.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ void ascii_pack(const char* ascii, size_t len, uint8_t* bin);
2626
void ascii_pack2(const char* ascii, size_t len, uint8_t* bin);
2727

2828
void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin);
29+
void ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin);
30+
2931
bool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len);
3032

3133
// maps ascii len to 7-bit packed length. Each 8 bytes are converted to 7 bytes.

0 commit comments

Comments
 (0)