Skip to content

Commit adb3266

Browse files
authored
feat(server): Redesign and simplify tiered storage module. (dragonflydb#589)
1. Allow offloading blobs larger than 2KB. 2. Totally redesign the offloading algorithm for blobs smaller than 2KB. 3. Fix bugs around IO request cancelations. Signed-off-by: Roman Gershman <[email protected]> Signed-off-by: Roman Gershman <[email protected]>
1 parent b5f6629 commit adb3266

File tree

11 files changed

+408
-357
lines changed

11 files changed

+408
-357
lines changed

src/core/compact_object.h

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ class CompactObj {
8989

9090
enum MaskBit {
9191
REF_BIT = 1,
92-
EXPIRE_BIT = 2,
93-
FLAG_BIT = 4,
92+
EXPIRE_BIT = 2, // Mark objects that have expiry timestamp assigned.
93+
FLAG_BIT = 4, // Used to mark keys that have memcache flags assigned.
9494

9595
// ascii encoding is not an injective function. it compresses 8 bytes to 7 but also 7 to 7.
9696
// therefore, in order to know the original length we introduce 2 flags that
@@ -266,14 +266,15 @@ class CompactObj {
266266
bool IsExternal() const {
267267
return taglen_ == EXTERNAL_TAG;
268268
}
269+
269270
void SetExternal(size_t offset, size_t sz);
270271
std::pair<size_t, size_t> GetExternalPtr() const;
271272

272273
// In case this object a single blob, returns number of bytes allocated on heap
273274
// for that blob. Otherwise returns 0.
274275
size_t MallocUsed() const;
275276

276-
// Resets the object to empty state.
277+
// Resets the object to empty state (string).
277278
void Reset();
278279

279280
bool IsInline() const {
@@ -346,9 +347,9 @@ class CompactObj {
346347
//
347348
static_assert(sizeof(u_) == 16, "");
348349

349-
// Maybe it's possible to merge those 2 together and gain another byte
350-
// but lets postpone it to 2023.
351350
mutable uint8_t mask_ = 0;
351+
352+
// We currently reserve 5 bits for tags and 3 bits for extending the mask. currently reserved.
352353
uint8_t taglen_ = 0;
353354
};
354355

@@ -362,4 +363,45 @@ inline bool CompactObj::operator==(std::string_view sv) const {
362363
return EqualNonInline(sv);
363364
}
364365

366+
class CompactObjectView {
367+
public:
368+
CompactObjectView(const CompactObj& src) : obj_(src.AsRef()) {
369+
}
370+
CompactObjectView(const CompactObjectView& o) : obj_(o.obj_.AsRef()) {
371+
}
372+
CompactObjectView(CompactObjectView&& o) = default;
373+
374+
operator CompactObj() const {
375+
return obj_.AsRef();
376+
}
377+
378+
const CompactObj* operator->() const {
379+
return &obj_;
380+
}
381+
382+
bool operator==(const CompactObjectView& o) const {
383+
return obj_ == o.obj_;
384+
}
385+
386+
uint64_t Hash() const {
387+
return obj_.HashCode();
388+
}
389+
390+
CompactObjectView& operator=(const CompactObjectView& o) {
391+
obj_ = o.obj_.AsRef();
392+
return *this;
393+
}
394+
395+
bool defined() const {
396+
return obj_.IsRef();
397+
}
398+
399+
void Reset() {
400+
obj_.Reset();
401+
}
402+
403+
private:
404+
CompactObj obj_;
405+
};
406+
365407
} // namespace dfly

src/core/generate_bin_sizes.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,12 @@
1010

1111
def print_small_bins():
1212
prev_val = 0
13-
for i in range(64, 1, -1):
14-
val = 4096 // i
15-
val = (val // 16)*16 # make it 16 bytes aligned
16-
if val != prev_val:
17-
print(val, end=', ')
18-
prev_val = val
19-
13+
for i in range(56, 1, -1):
14+
len = (4096 - i*8) # reduce by size of hashes
15+
len = (len // 8)*8 # make it 8 bytes aligned
16+
if len != prev_val:
17+
print(i, len)
18+
prev_val = len
2019
print()
2120

2221

src/server/common.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,13 @@ bool ParseDouble(string_view src, double* value) {
193193
#define ADD(x) (x) += o.x
194194

195195
TieredStats& TieredStats::operator+=(const TieredStats& o) {
196-
static_assert(sizeof(TieredStats) == 32);
196+
static_assert(sizeof(TieredStats) == 40);
197197

198-
ADD(external_reads);
199-
ADD(external_writes);
198+
ADD(tiered_reads);
199+
ADD(tiered_writes);
200200
ADD(storage_capacity);
201201
ADD(storage_reserved);
202+
ADD(aborted_offloads);
202203
return *this;
203204
}
204205

src/server/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,14 @@ struct OpArgs {
9191
};
9292

9393
struct TieredStats {
94-
size_t external_reads = 0;
95-
size_t external_writes = 0;
94+
size_t tiered_reads = 0;
95+
size_t tiered_writes = 0;
9696

9797
size_t storage_capacity = 0;
9898

9999
// how much was reserved by actively stored items.
100100
size_t storage_reserved = 0;
101+
size_t aborted_offloads = 0;
101102

102103
TieredStats& operator+=(const TieredStats&);
103104
};

src/server/db_slice.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,8 +703,11 @@ void DbSlice::PreUpdate(DbIndex db_ind, PrimeIterator it) {
703703
tiered->Free(offset, size);
704704
it->second.Reset();
705705

706-
stats->external_entries -= 1;
707-
stats->external_size -= size;
706+
stats->tiered_entries -= 1;
707+
stats->tiered_size -= size;
708+
} else if (it->second.HasIoPending()) {
709+
TieredStorage* tiered = shard_owner()->tiered_storage();
710+
tiered->CancelIo(db_ind, it);
708711
}
709712
}
710713

src/server/server_family.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,12 +1313,13 @@ void ServerFamily::Info(CmdArgList args, ConnectionContext* cntx) {
13131313

13141314
if (should_enter("TIERED", true)) {
13151315
ADD_HEADER("# TIERED");
1316-
append("external_entries", total.external_entries);
1317-
append("external_bytes", total.external_size);
1318-
append("external_reads", m.tiered_stats.external_reads);
1319-
append("external_writes", m.tiered_stats.external_writes);
1320-
append("external_reserved", m.tiered_stats.storage_reserved);
1321-
append("external_capacity", m.tiered_stats.storage_capacity);
1316+
append("tiered_entries", total.tiered_entries);
1317+
append("tiered_bytes", total.tiered_size);
1318+
append("tiered_reads", m.tiered_stats.tiered_reads);
1319+
append("tiered_writes", m.tiered_stats.tiered_writes);
1320+
append("tiered_reserved", m.tiered_stats.storage_reserved);
1321+
append("tiered_capacity", m.tiered_stats.storage_capacity);
1322+
append("tiered_aborted_writes", m.tiered_stats.aborted_offloads);
13221323
}
13231324

13241325
if (should_enter("PERSISTENCE", true)) {

src/server/string_family.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ OpStatus SetCmd::Set(const SetParams& params, string_view key, string_view value
413413
TieredStorage::EligibleForOffload(value)) { // external storage enabled.
414414
// TODO: we may have a bug if we block the fiber inside UnloadItem - "it" may be invalid
415415
// afterwards.
416-
shard->tiered_storage()->UnloadItem(op_args_.db_cntx.db_index, it);
416+
shard->tiered_storage()->ScheduleOffload(op_args_.db_cntx.db_index, it);
417417
}
418418

419419
return OpStatus::OK;
@@ -458,14 +458,14 @@ OpStatus SetCmd::SetExisting(const SetParams& params, PrimeIterator it, ExpireIt
458458

459459
// overwrite existing entry.
460460
prime_value.SetString(value);
461+
DCHECK(!prime_value.HasIoPending());
461462

462463
if (value.size() >= kMinTieredLen) { // external storage enabled.
463-
464464
// TODO: if UnloadItem can block the calling fiber, then we have the bug because then "it"
465465
// can be invalid after the function returns and the functions that follow may access invalid
466466
// entry.
467467
if (shard->tiered_storage()) {
468-
shard->tiered_storage()->UnloadItem(op_args_.db_cntx.db_index, it);
468+
shard->tiered_storage()->ScheduleOffload(op_args_.db_cntx.db_index, it);
469469
}
470470
}
471471

src/server/table.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,15 @@ DbTableStats& DbTableStats::operator+=(const DbTableStats& o) {
2323
ADD(update_value_amount);
2424
ADD(listpack_blob_cnt);
2525
ADD(listpack_bytes);
26-
ADD(external_entries);
27-
ADD(external_size);
26+
ADD(tiered_entries);
27+
ADD(tiered_size);
2828

2929
return *this;
3030
}
3131

3232
DbTable::DbTable(std::pmr::memory_resource* mr)
3333
: prime(kInitSegmentLog, detail::PrimeTablePolicy{}, mr),
34-
expire(0, detail::ExpireTablePolicy{}, mr),
35-
mcflag(0, detail::ExpireTablePolicy{}, mr) {
34+
expire(0, detail::ExpireTablePolicy{}, mr), mcflag(0, detail::ExpireTablePolicy{}, mr) {
3635
}
3736

3837
DbTable::~DbTable() {

src/server/table.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ struct DbTableStats {
4848
ssize_t update_value_amount = 0;
4949
size_t listpack_blob_cnt = 0;
5050
size_t listpack_bytes = 0;
51-
size_t external_entries = 0;
52-
size_t external_size = 0;
51+
size_t tiered_entries = 0;
52+
size_t tiered_size = 0;
5353

5454
DbTableStats& operator+=(const DbTableStats& o);
5555
};

0 commit comments

Comments
 (0)