@@ -366,6 +366,10 @@ struct TL {
366
366
size_t small_str_bytes;
367
367
Huffman huff_keys, huff_string_values;
368
368
uint64_t huff_encode_total = 0 , huff_encode_success = 0 ; // success/total metrics.
369
+
370
+ const HuffmanDecoder& GetHuffmanDecoder (uint8_t huffman_domain) const {
371
+ return huffman_domain == CompactObj::HUFF_KEYS ? huff_keys.decoder : huff_string_values.decoder ;
372
+ }
369
373
};
370
374
371
375
thread_local TL tl;
@@ -606,10 +610,9 @@ int RobjWrapper::ZsetAdd(double score, std::string_view ele, int in_flags, int*
606
610
bool gt = (in_flags & ZADD_IN_GT) != 0 ;
607
611
bool lt = (in_flags & ZADD_IN_LT) != 0 ;
608
612
609
- unsigned char * eptr;
610
613
uint8_t * lp = (uint8_t *)inner_obj_;
611
-
612
- if (( eptr = ZzlFind (lp, ele, &curscore)) != NULL ) {
614
+ uint8_t * eptr = ZzlFind (lp, ele, &curscore);
615
+ if (eptr != NULL ) {
613
616
/* NX? Return, same element already exists. */
614
617
if (nx) {
615
618
*out_flags |= ZADD_OUT_NOP;
@@ -774,8 +777,10 @@ CompactObj& CompactObj::operator=(CompactObj&& o) noexcept {
774
777
SetMeta (o.taglen_ , o.mask_ ); // Frees underlying resources if needed.
775
778
memcpy (&u_, &o.u_ , sizeof (u_));
776
779
780
+ tagbyte_ = o.tagbyte_ ;
781
+
777
782
// SetMeta deallocates the object and we only want reset it.
778
- o.taglen_ = 0 ;
783
+ o.tagbyte_ = 0 ;
779
784
o.mask_ = 0 ;
780
785
781
786
return *this ;
@@ -1012,7 +1017,7 @@ void CompactObj::SetString(std::string_view str, bool is_key) {
1012
1017
}
1013
1018
}
1014
1019
1015
- EncodeString (str);
1020
+ EncodeString (str, is_key );
1016
1021
}
1017
1022
1018
1023
void CompactObj::ReserveString (size_t size) {
@@ -1141,7 +1146,8 @@ void CompactObj::GetString(char* dest) const {
1141
1146
next += slices[0 ].size () - 1 ;
1142
1147
memcpy (next, slices[1 ].data (), slices[1 ].size ());
1143
1148
string_view src (reinterpret_cast <const char *>(tl.tmp_buf .data ()), tl.tmp_buf .size ());
1144
- CHECK (tl.huff_keys .decoder .Decode (src, decoded_len, dest));
1149
+ const auto & decoder = tl.GetHuffmanDecoder (huffman_domain_);
1150
+ CHECK (decoder.Decode (src, decoded_len, dest));
1145
1151
return ;
1146
1152
}
1147
1153
@@ -1237,15 +1243,15 @@ void CompactObj::Materialize(std::string_view blob, bool is_raw) {
1237
1243
u_.r_obj .SetString (blob, tl.local_mr );
1238
1244
}
1239
1245
} else {
1240
- EncodeString (blob);
1246
+ EncodeString (blob, false );
1241
1247
}
1242
1248
}
1243
1249
1244
1250
void CompactObj::Reset () {
1245
1251
if (HasAllocated ()) {
1246
1252
Free ();
1247
1253
}
1248
- taglen_ = 0 ;
1254
+ tagbyte_ = 0 ;
1249
1255
mask_ = 0 ;
1250
1256
}
1251
1257
@@ -1355,7 +1361,8 @@ bool CompactObj::CmpEncoded(string_view sv) const {
1355
1361
constexpr size_t kMaxHuffLen = kInlineLen * 3 ;
1356
1362
if (sz <= kMaxHuffLen ) {
1357
1363
char buf[kMaxHuffLen ];
1358
- CHECK (tl.huff_keys .decoder .Decode ({u_.inline_str + 1 , size_t (taglen_ - 1 )}, sz, buf));
1364
+ const auto & decoder = tl.GetHuffmanDecoder (huffman_domain_);
1365
+ CHECK (decoder.Decode ({u_.inline_str + 1 , size_t (taglen_ - 1 )}, sz, buf));
1359
1366
return sv == string_view (buf, sz);
1360
1367
}
1361
1368
}
@@ -1437,7 +1444,7 @@ bool CompactObj::CmpEncoded(string_view sv) const {
1437
1444
return false ;
1438
1445
}
1439
1446
1440
- void CompactObj::EncodeString (string_view str) {
1447
+ void CompactObj::EncodeString (string_view str, bool is_key ) {
1441
1448
DCHECK_GT (str.size (), kInlineLen );
1442
1449
DCHECK_EQ (NONE_ENC, mask_bits_.encoding );
1443
1450
@@ -1447,6 +1454,7 @@ void CompactObj::EncodeString(string_view str) {
1447
1454
// We chose such length that we can store the decoded length delta into 1 byte.
1448
1455
// The maximum huffman compression is 1/8, so 288 / 8 = 36.
1449
1456
// 288 - 36 = 252, which is smaller than 256.
1457
+ // TODO: introduce variable length huffman length.
1450
1458
constexpr unsigned kMaxHuffLen = 288 ;
1451
1459
1452
1460
// For sizes 17, 18 we would like to test ascii encoding first as it's more efficient.
@@ -1455,34 +1463,38 @@ void CompactObj::EncodeString(string_view str) {
1455
1463
kUseAsciiEncoding && str.size () < 19 && detail::validate_ascii_fast (str.data (), str.size ());
1456
1464
1457
1465
// if !is_ascii, we try huffman encoding next.
1458
- if (!is_ascii && str.size () <= kMaxHuffLen && tl.huff_keys .encoder .valid ()) {
1459
- unsigned dest_len = tl.huff_keys .encoder .CompressedBound (str.size ());
1460
- // 1 byte for storing the size delta.
1461
- tl.tmp_buf .resize (1 + dest_len);
1462
- string err_msg;
1463
- ++tl.huff_encode_total ;
1464
- bool res = tl.huff_keys .encoder .Encode (str, tl.tmp_buf .data () + 1 , &dest_len, &err_msg);
1465
- if (res) {
1466
- // we accept huffman encoding only if it is:
1467
- // 1. smaller than the original string by 20%
1468
- // 2. allows us to store the encoded string in the inline buffer
1469
- if (dest_len && (dest_len < kInlineLen || (dest_len + dest_len / 5 ) < str.size ())) {
1470
- huff_encoded = true ;
1471
- tl.huff_encode_success ++;
1472
- encoded = string_view{reinterpret_cast <char *>(tl.tmp_buf .data ()), dest_len + 1 };
1473
- unsigned delta = str.size () - dest_len;
1474
- DCHECK_LT (delta, 256u );
1475
- tl.tmp_buf [0 ] = static_cast <uint8_t >(delta);
1476
- mask_bits_.encoding = HUFFMAN_ENC;
1477
- if (encoded.size () <= kInlineLen ) {
1478
- SetMeta (encoded.size (), mask_);
1479
- memcpy (u_.inline_str , tl.tmp_buf .data (), encoded.size ());
1480
- return ;
1466
+ if (!is_ascii && str.size () <= kMaxHuffLen ) {
1467
+ auto & huffman = is_key ? tl.huff_keys : tl.huff_string_values ;
1468
+ if (huffman.encoder .valid ()) {
1469
+ unsigned dest_len = huffman.encoder .CompressedBound (str.size ());
1470
+ // 1 byte for storing the size delta.
1471
+ tl.tmp_buf .resize (1 + dest_len);
1472
+ string err_msg;
1473
+ ++tl.huff_encode_total ;
1474
+ bool res = huffman.encoder .Encode (str, tl.tmp_buf .data () + 1 , &dest_len, &err_msg);
1475
+ if (res) {
1476
+ // we accept huffman encoding only if it is:
1477
+ // 1. smaller than the original string by 20%
1478
+ // 2. allows us to store the encoded string in the inline buffer
1479
+ if (dest_len && (dest_len < kInlineLen || (dest_len + dest_len / 5 ) < str.size ())) {
1480
+ huff_encoded = true ;
1481
+ tl.huff_encode_success ++;
1482
+ encoded = string_view{reinterpret_cast <char *>(tl.tmp_buf .data ()), dest_len + 1 };
1483
+ unsigned delta = str.size () - dest_len;
1484
+ DCHECK_LT (delta, 256u );
1485
+ tl.tmp_buf [0 ] = static_cast <uint8_t >(delta);
1486
+ mask_bits_.encoding = HUFFMAN_ENC;
1487
+ huffman_domain_ = is_key ? HUFF_KEYS : HUFF_STRING_VALUES;
1488
+ if (encoded.size () <= kInlineLen ) {
1489
+ SetMeta (encoded.size (), mask_);
1490
+ memcpy (u_.inline_str , tl.tmp_buf .data (), encoded.size ());
1491
+ return ;
1492
+ }
1481
1493
}
1494
+ } else {
1495
+ // Should not happen, means we have an internal buf.
1496
+ LOG (DFATAL) << " Failed to encode string with huffman: " << err_msg;
1482
1497
}
1483
- } else {
1484
- // Should not happen, means we have an internal buf.
1485
- LOG (DFATAL) << " Failed to encode string with huffman: " << err_msg;
1486
1498
}
1487
1499
}
1488
1500
@@ -1609,9 +1621,11 @@ size_t CompactObj::StrEncoding::Decode(std::string_view blob, char* dest) const
1609
1621
case ASCII2_ENC:
1610
1622
detail::ascii_unpack (reinterpret_cast <const uint8_t *>(blob.data ()), decoded_len, dest);
1611
1623
break ;
1612
- case HUFFMAN_ENC:
1613
- tl.huff_keys .decoder .Decode (blob.substr (1 ), decoded_len, dest);
1624
+ case HUFFMAN_ENC: {
1625
+ const auto & decoder = tl.GetHuffmanDecoder (is_key_);
1626
+ decoder.Decode (blob.substr (1 ), decoded_len, dest);
1614
1627
break ;
1628
+ }
1615
1629
};
1616
1630
return decoded_len;
1617
1631
}
0 commit comments