Skip to content

Commit 2e3ebec

Browse files
committed
rearrange key serialization
Signed-off-by: zhengyu <zhangzhengyu@selectdb.com>
1 parent 387fdfc commit 2e3ebec

File tree

2 files changed

+102
-55
lines changed

2 files changed

+102
-55
lines changed

be/src/io/cache/cache_block_meta_store.cpp

Lines changed: 91 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
#include <sstream>
3232

3333
#include "common/status.h"
34+
#include "olap/field.h"
35+
#include "olap/field.h" // For OLAP_FIELD_TYPE_BIGINT
36+
#include "olap/key_coder.h"
37+
#include "olap/olap_common.h"
3438
#include "util/threadpool.h"
3539
#include "vec/common/hex.h"
3640

@@ -155,8 +159,11 @@ std::optional<BlockMeta> CacheBlockMetaStore::get(const BlockMetaKey& key) {
155159
}
156160

157161
std::unique_ptr<BlockMetaIterator> CacheBlockMetaStore::range_get(int64_t tablet_id) {
158-
// we trade accurate for clean code. so we ignore pending operations in the write queue
159-
std::string prefix = std::to_string(tablet_id) + "_";
162+
// Generate prefix using new serialization format
163+
std::string prefix;
164+
prefix.push_back(0x1); // version byte
165+
auto* tablet_id_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_BIGINT);
166+
tablet_id_coder->full_encode_ascending(&tablet_id, &prefix);
160167

161168
class RocksDBIterator : public BlockMetaIterator {
162169
public:
@@ -167,30 +174,48 @@ std::unique_ptr<BlockMetaIterator> CacheBlockMetaStore::range_get(int64_t tablet
167174

168175
~RocksDBIterator() override { delete _iter; }
169176

170-
bool valid() const override { return _iter->Valid() && _iter->key().starts_with(_prefix); }
177+
bool valid() const override {
178+
if (!_iter->Valid()) return false;
179+
Slice key_slice(_iter->key().data(), _prefix.size());
180+
return key_slice.compare(Slice(_prefix)) == 0;
181+
}
171182

172183
void next() override { _iter->Next(); }
173184

174185
BlockMetaKey key() const override {
175186
std::string key_str = _iter->key().ToString();
176-
// Key format: "tabletid_hashstring_offset"
177-
size_t pos1 = key_str.find('_');
178-
if (pos1 == std::string::npos) {
179-
return BlockMetaKey();
180-
}
187+
Slice slice(key_str);
181188

182-
size_t pos2 = key_str.find('_', pos1 + 1);
183-
if (pos2 == std::string::npos) {
189+
// Check version byte
190+
if (slice.size < 1 || slice.data[0] != 0x1) {
191+
LOG(WARNING) << "Invalid key version in range_get";
184192
return BlockMetaKey();
185193
}
194+
slice.remove_prefix(1); // skip version byte
195+
196+
auto* tablet_id_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_BIGINT);
197+
int64_t tablet_id;
198+
uint64_t hash_high, hash_low;
199+
size_t offset;
200+
201+
Status st = tablet_id_coder->decode_ascending(&slice, sizeof(int64_t),
202+
reinterpret_cast<uint8_t*>(&tablet_id));
203+
if (!st.ok()) return BlockMetaKey();
204+
205+
st = tablet_id_coder->decode_ascending(&slice, sizeof(uint64_t),
206+
reinterpret_cast<uint8_t*>(&hash_high));
207+
if (!st.ok()) return BlockMetaKey();
186208

187-
int64_t tablet_id = std::stoll(key_str.substr(0, pos1));
188-
std::string hash_str = key_str.substr(pos1 + 1, pos2 - pos1 - 1);
189-
size_t offset = std::stoull(key_str.substr(pos2 + 1));
190-
// Convert hash string back to UInt128Wrapper
191-
// Using unhex_uint to parse hex string to uint128_t
192-
uint128_t hash_value = vectorized::unhex_uint<uint128_t>(hash_str.c_str());
193-
return BlockMetaKey(tablet_id, UInt128Wrapper(hash_value), offset);
209+
st = tablet_id_coder->decode_ascending(&slice, sizeof(uint64_t),
210+
reinterpret_cast<uint8_t*>(&hash_low));
211+
if (!st.ok()) return BlockMetaKey();
212+
213+
st = tablet_id_coder->decode_ascending(&slice, sizeof(size_t),
214+
reinterpret_cast<uint8_t*>(&offset));
215+
if (!st.ok()) return BlockMetaKey();
216+
217+
uint128_t hash = (static_cast<uint128_t>(hash_high) << 64) | hash_low;
218+
return BlockMetaKey(tablet_id, UInt128Wrapper(hash), offset);
194219
}
195220

196221
BlockMeta value() const override {
@@ -233,23 +258,7 @@ std::unique_ptr<BlockMetaIterator> CacheBlockMetaStore::get_all() {
233258

234259
BlockMetaKey key() const override {
235260
std::string key_str = _iter->key().ToString();
236-
// Key format: "tabletid_hashstring_offset"
237-
size_t pos1 = key_str.find('_');
238-
if (pos1 == std::string::npos) {
239-
return BlockMetaKey();
240-
}
241-
242-
size_t pos2 = key_str.find('_', pos1 + 1);
243-
if (pos2 == std::string::npos) {
244-
return BlockMetaKey();
245-
}
246-
247-
int64_t tablet_id = std::stoll(key_str.substr(0, pos1));
248-
std::string hash_str = key_str.substr(pos1 + 1, pos2 - pos1 - 1);
249-
size_t offset = std::stoull(key_str.substr(pos2 + 1));
250-
// Convert hash string back to UInt128Wrapper
251-
uint128_t hash_value = vectorized::unhex_uint<uint128_t>(hash_str.c_str());
252-
return BlockMetaKey(tablet_id, UInt128Wrapper(hash_value), offset);
261+
return deserialize_key(key_str);
253262
}
254263

255264
BlockMeta value() const override {
@@ -375,7 +384,24 @@ void CacheBlockMetaStore::async_write_worker() {
375384
}
376385

377386
std::string serialize_key(const BlockMetaKey& key) {
378-
return fmt::format("{}_{}_{}", key.tablet_id, key.hash.to_string(), key.offset);
387+
std::string result;
388+
// Add version byte
389+
result.push_back(0x1);
390+
391+
// Encode tablet_id using KeyCoderTraits
392+
auto* tablet_id_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_BIGINT);
393+
tablet_id_coder->full_encode_ascending(&key.tablet_id, &result);
394+
395+
// Encode hash high and low parts
396+
uint64_t hash_high = key.hash.high();
397+
uint64_t hash_low = key.hash.low();
398+
tablet_id_coder->full_encode_ascending(&hash_high, &result);
399+
tablet_id_coder->full_encode_ascending(&hash_low, &result);
400+
401+
// Encode offset
402+
tablet_id_coder->full_encode_ascending(&key.offset, &result);
403+
404+
return result;
379405
}
380406

381407
std::string serialize_value(const BlockMeta& meta) {
@@ -390,19 +416,39 @@ std::string serialize_value(const BlockMeta& meta) {
390416
}
391417

392418
BlockMetaKey deserialize_key(const std::string& key_str) {
393-
// Key format: "tabletid_hashstring_offset"
394-
size_t pos1 = key_str.find('_');
395-
size_t pos2 = key_str.find('_', pos1 + 1);
419+
// New key format: [version][encoded tablet_id][encoded hash_high][encoded hash_low][encoded offset]
420+
Slice slice(key_str);
421+
422+
// Check version byte
423+
if (slice.size < 1 || slice.data[0] != 0x1) {
424+
LOG(WARNING) << "Invalid key, expected prefix 0x1";
425+
return BlockMetaKey(); // Invalid version
426+
}
427+
slice.remove_prefix(1); // skip version byte
428+
429+
auto* tablet_id_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_BIGINT);
430+
int64_t tablet_id;
431+
uint64_t hash_high, hash_low;
432+
size_t offset;
433+
434+
Status st = tablet_id_coder->decode_ascending(&slice, sizeof(int64_t),
435+
reinterpret_cast<uint8_t*>(&tablet_id));
436+
if (!st.ok()) return BlockMetaKey();
437+
438+
st = tablet_id_coder->decode_ascending(&slice, sizeof(uint64_t),
439+
reinterpret_cast<uint8_t*>(&hash_high));
440+
if (!st.ok()) return BlockMetaKey();
396441

397-
int64_t tablet_id = std::stoll(key_str.substr(0, pos1));
398-
std::string hash_str = key_str.substr(pos1 + 1, pos2 - pos1 - 1);
399-
size_t offset = std::stoull(key_str.substr(pos2 + 1));
442+
st = tablet_id_coder->decode_ascending(&slice, sizeof(uint64_t),
443+
reinterpret_cast<uint8_t*>(&hash_low));
444+
if (!st.ok()) return BlockMetaKey();
400445

401-
// Convert hash string back to UInt128Wrapper
402-
// Using unhex_uint to parse hex string to uint128_t
403-
uint128_t hash_value = vectorized::unhex_uint<uint128_t>(hash_str.c_str());
446+
st = tablet_id_coder->decode_ascending(&slice, sizeof(size_t),
447+
reinterpret_cast<uint8_t*>(&offset));
448+
if (!st.ok()) return BlockMetaKey();
404449

405-
return BlockMetaKey(tablet_id, UInt128Wrapper(hash_value), offset);
450+
uint128_t hash = (static_cast<uint128_t>(hash_high) << 64) | hash_low;
451+
return BlockMetaKey(tablet_id, UInt128Wrapper(hash), offset);
406452
}
407453

408454
BlockMeta deserialize_value(const std::string& value_str) {

be/test/io/cache/cache_block_meta_store_test.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_F(CacheBlockMetaStoreTest, BasicPutAndGet) {
6161
meta_store_->put(key1, meta1);
6262

6363
// Wait a bit for async operation to complete
64-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
64+
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
6565

6666
// Test get operation
6767
auto result = meta_store_->get(key1);
@@ -300,19 +300,20 @@ TEST_F(CacheBlockMetaStoreTest, IteratorValidity) {
300300
EXPECT_FALSE(iterator->valid());
301301
}
302302

303-
TEST_F(CacheBlockMetaStoreTest, KeyToString) {
303+
TEST_F(CacheBlockMetaStoreTest, KeySerialization) {
304304
uint128_t hash4 = (static_cast<uint128_t>(456789) << 64) | 987654;
305305
BlockMetaKey key(123, UInt128Wrapper(hash4), 1024);
306-
std::string key_str = key.to_string();
307306

308-
// UInt128Wrapper::to_string() returns hex string, so check for hex representations
309-
EXPECT_NE(key_str.find("123"), std::string::npos); // tablet_id in decimal
310-
EXPECT_NE(key_str.find("1024"), std::string::npos); // offset in decimal
307+
// Test round-trip serialization
308+
std::string serialized = serialize_key(key);
309+
BlockMetaKey deserialized = deserialize_key(serialized);
310+
311+
EXPECT_EQ(deserialized.tablet_id, key.tablet_id);
312+
EXPECT_EQ(deserialized.hash, key.hash);
313+
EXPECT_EQ(deserialized.offset, key.offset);
311314

312-
// Check for hex representations of the hash parts
313-
// 456789 in hex is 6f855, 987654 in hex is f1206
314-
EXPECT_NE(key_str.find("6f855"), std::string::npos);
315-
EXPECT_NE(key_str.find("f1206"), std::string::npos);
315+
// Verify version byte
316+
EXPECT_EQ(serialized[0], 0x1);
316317
}
317318

318319
TEST_F(CacheBlockMetaStoreTest, BlockMetaEquality) {

0 commit comments

Comments
 (0)