@@ -786,9 +786,7 @@ size_t CompactObj::Size() const {
786
786
switch (taglen_) {
787
787
case SMALL_TAG:
788
788
raw_size = u_.small_str .size ();
789
- if (mask_bits_.encoding == HUFFMAN_ENC) {
790
- return DecodedLen (raw_size, u_.small_str .first_byte ());
791
- }
789
+ first_byte = u_.small_str .first_byte ();
792
790
break ;
793
791
case INT_TAG: {
794
792
absl::AlphaNum an (u_.ival );
@@ -801,9 +799,7 @@ size_t CompactObj::Size() const {
801
799
break ;
802
800
case ROBJ_TAG:
803
801
raw_size = u_.r_obj .Size ();
804
- if (mask_bits_.encoding == HUFFMAN_ENC) {
805
- return DecodedLen (raw_size, *(uint8_t *)u_.r_obj .inner_obj ());
806
- }
802
+ first_byte = *(uint8_t *)u_.r_obj .inner_obj ();
807
803
break ;
808
804
case JSON_TAG:
809
805
DCHECK_EQ (mask_bits_.encoding , NONE_ENC);
@@ -821,7 +817,7 @@ size_t CompactObj::Size() const {
821
817
LOG (DFATAL) << " Should not reach " << int (taglen_);
822
818
}
823
819
}
824
- return mask_bits_. encoding ? DecodedLen (raw_size, first_byte) : raw_size ;
820
+ return GetStrEncoding (). DecodedSize (raw_size, first_byte);
825
821
}
826
822
827
823
uint64_t CompactObj::HashCode () const {
@@ -848,16 +844,8 @@ uint64_t CompactObj::HashCode() const {
848
844
849
845
if (IsInline ()) {
850
846
char buf[kInlineLen * 3 ]; // should suffice for most huffman decodings.
851
- size_t decoded_len = DecodedLen (taglen_, u_.inline_str [0 ]);
852
- if (mask_bits_.encoding == HUFFMAN_ENC) {
853
- if (decoded_len <= sizeof (buf) &&
854
- tl.huff_keys .decoder .Decode ({u_.inline_str + 1 , size_t (taglen_ - 1 )}, decoded_len, buf)) {
855
- return XXH3_64bits_withSeed (buf, decoded_len, kHashSeed );
856
- }
857
- } else {
858
- detail::ascii_unpack (to_byte (u_.inline_str ), decoded_len, buf);
859
- return XXH3_64bits_withSeed (buf, decoded_len, kHashSeed );
860
- }
847
+ size_t decoded_len = GetStrEncoding ().Decode (string_view{u_.inline_str , taglen_}, buf);
848
+ return XXH3_64bits_withSeed (buf, decoded_len, kHashSeed );
861
849
}
862
850
863
851
string_view sv = GetSlice (&tl.tmp_str );
@@ -1114,21 +1102,7 @@ void CompactObj::GetString(char* dest) const {
1114
1102
CHECK (!IsExternal ());
1115
1103
1116
1104
if (IsInline ()) {
1117
- switch (mask_bits_.encoding ) {
1118
- case ASCII2_ENC:
1119
- DCHECK_EQ (taglen_ + 2u , ascii_len (taglen_));
1120
- detail::ascii_unpack (to_byte (u_.inline_str ), taglen_ + 2 , dest);
1121
- break ;
1122
- case HUFFMAN_ENC:
1123
- tl.huff_keys .decoder .Decode ({u_.inline_str + 1 , size_t (taglen_ - 1 )},
1124
- u_.inline_str [0 ] + taglen_ - 1 , dest);
1125
- break ;
1126
- case NONE_ENC:
1127
- memcpy (dest, u_.inline_str , taglen_);
1128
- break ;
1129
- default :
1130
- DLOG (FATAL) << " should not reach " << int (mask_bits_.encoding );
1131
- }
1105
+ GetStrEncoding ().Decode ({u_.inline_str , taglen_}, dest);
1132
1106
return ;
1133
1107
}
1134
1108
@@ -1142,19 +1116,15 @@ void CompactObj::GetString(char* dest) const {
1142
1116
if (taglen_ == ROBJ_TAG) {
1143
1117
CHECK_EQ (OBJ_STRING, u_.r_obj .type ());
1144
1118
DCHECK_EQ (OBJ_ENCODING_RAW, u_.r_obj .encoding ());
1145
- size_t decoded_len = DecodedLen (u_.r_obj .Size (), *(const uint8_t *)u_.r_obj .inner_obj ());
1146
- if (mask_bits_.encoding == HUFFMAN_ENC) {
1147
- CHECK (tl.huff_keys .decoder .Decode (
1148
- {(const char *)u_.r_obj .inner_obj () + 1 , u_.r_obj .Size () - 1 }, decoded_len, dest));
1149
- return ;
1150
- }
1151
- detail::ascii_unpack_simd (to_byte (u_.r_obj .inner_obj ()), decoded_len, dest);
1119
+ string_view blob{(const char *)u_.r_obj .inner_obj (), u_.r_obj .Size ()};
1120
+ GetStrEncoding ().Decode (blob, dest);
1121
+ return ;
1152
1122
} else {
1153
1123
CHECK_EQ (SMALL_TAG, taglen_);
1154
1124
string_view slices[2 ];
1155
1125
unsigned num = u_.small_str .GetV (slices);
1156
1126
DCHECK_EQ (2u , num);
1157
- size_t decoded_len = DecodedLen (u_.small_str .size (), slices[0 ][0 ]);
1127
+ size_t decoded_len = GetStrEncoding (). DecodedSize (u_.small_str .size (), slices[0 ][0 ]);
1158
1128
1159
1129
if (mask_bits_.encoding == HUFFMAN_ENC) {
1160
1130
tl.tmp_buf .resize (slices[0 ].size () + slices[1 ].size () - 1 );
@@ -1575,15 +1545,6 @@ StringOrView CompactObj::GetRawString() const {
1575
1545
return {};
1576
1546
}
1577
1547
1578
- size_t CompactObj::DecodedLen (size_t sz, uint8_t b) const {
1579
- DCHECK (mask_bits_.encoding );
1580
- if (mask_bits_.encoding == HUFFMAN_ENC) {
1581
- return sz + b - 1 ;
1582
- }
1583
- unsigned delta = (mask_bits_.encoding == ASCII1_ENC) ? 1 : 0 ;
1584
- return ascii_len (sz) - delta;
1585
- }
1586
-
1587
1548
MemoryResource* CompactObj::memory_resource () {
1588
1549
return tl.local_mr ;
1589
1550
}
@@ -1613,4 +1574,52 @@ CompactObjType ObjTypeFromString(std::string_view sv) {
1613
1574
return kInvalidCompactObjType ;
1614
1575
}
1615
1576
1577
+ size_t CompactObj::StrEncoding::DecodedSize (string_view blob) const {
1578
+ return DecodedSize (blob.size (), blob[0 ]);
1579
+ }
1580
+
1581
+ size_t CompactObj::StrEncoding::DecodedSize (size_t blob_size, uint8_t first_byte) const {
1582
+ switch (enc_) {
1583
+ case NONE_ENC:
1584
+ return blob_size;
1585
+ case ASCII1_ENC:
1586
+ case ASCII2_ENC:
1587
+ return ascii_len (blob_size) - (enc_ == ASCII1_ENC);
1588
+ case HUFFMAN_ENC:
1589
+ return blob_size + int (first_byte) - 1 ;
1590
+ };
1591
+ return 0 ;
1592
+ }
1593
+
1594
+ size_t CompactObj::StrEncoding::Decode (std::string_view blob, char * dest) const {
1595
+ size_t decoded_len = DecodedSize (blob);
1596
+ switch (enc_) {
1597
+ case NONE_ENC:
1598
+ memcpy (dest, blob.data (), blob.size ());
1599
+ break ;
1600
+ case ASCII1_ENC:
1601
+ case ASCII2_ENC:
1602
+ detail::ascii_unpack (reinterpret_cast <const uint8_t *>(blob.data ()), decoded_len, dest);
1603
+ break ;
1604
+ case HUFFMAN_ENC:
1605
+ tl.huff_keys .decoder .Decode (blob.substr (1 ), decoded_len, dest);
1606
+ break ;
1607
+ };
1608
+ return decoded_len;
1609
+ }
1610
+
1611
+ StringOrView CompactObj::StrEncoding::Decode (std::string_view blob) const {
1612
+ switch (enc_) {
1613
+ case NONE_ENC:
1614
+ return StringOrView::FromView (blob);
1615
+ default : {
1616
+ string out;
1617
+ out.resize (DecodedSize (blob));
1618
+ Decode (blob, out.data ());
1619
+ return StringOrView::FromString (std::move (out));
1620
+ }
1621
+ }
1622
+ return {};
1623
+ }
1624
+
1616
1625
} // namespace dfly
0 commit comments