Skip to content
8 changes: 7 additions & 1 deletion src/core/compact_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1224,11 +1224,13 @@ CompactObj::ExternalRep CompactObj::GetExternalRep() const {
return static_cast<CompactObj::ExternalRep>(u_.ext_ptr.representation);
}

void CompactObj::SetCool(size_t offset, uint32_t sz, detail::TieredColdRecord* record) {
void CompactObj::SetCool(size_t offset, uint32_t sz, ExternalRep rep,
detail::TieredColdRecord* record) {
// We copy the mask of the "cooled" referenced object because it contains the encoding info.
SetMeta(EXTERNAL_TAG, record->value.mask_);

u_.ext_ptr.is_cool = 1;
u_.ext_ptr.representation = static_cast<uint8_t>(rep);
u_.ext_ptr.page_offset = offset % 4096;
u_.ext_ptr.serialized_size = sz;
u_.ext_ptr.cool_record = record;
Expand All @@ -1244,6 +1246,10 @@ auto CompactObj::GetCool() const -> CoolItem {
return res;
}

void CompactObj::Freeze(size_t offset, size_t sz) {
SetExternal(offset, sz, GetExternalRep());
}

std::pair<size_t, size_t> CompactObj::GetExternalSlice() const {
DCHECK_EQ(EXTERNAL_TAG, taglen_);
auto& ext = u_.ext_ptr;
Expand Down
7 changes: 6 additions & 1 deletion src/core/compact_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,8 @@ class CompactObj {
}

// Assigns a cooling record to the object together with its external slice.
void SetCool(size_t offset, uint32_t serialized_size, detail::TieredColdRecord* record);
void SetCool(size_t offset, uint32_t serialized_size, ExternalRep rep,
detail::TieredColdRecord* record);

struct CoolItem {
uint16_t page_offset;
Expand All @@ -376,6 +377,10 @@ class CompactObj {
// Returns the external data of the object incuding its ColdRecord.
CoolItem GetCool() const;

// Prequisite: IsCool() is true.
// Keeps cool record only as external value and discard in-memory part.
void Freeze(size_t offset, size_t sz);

std::pair<size_t, size_t> GetExternalSlice() const;

// Injects either the the raw string (extracted with GetRawString()) or the usual string
Expand Down
12 changes: 6 additions & 6 deletions src/core/detail/listpack_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ void ListpackWrap::Iterator::Read() {
next_ptr_ = lpNext(lp_, next_ptr_);
}

ListpackWrap::~ListpackWrap() {
DCHECK(!dirty_);
ListpackWrap ListpackWrap::WithCapacity(size_t capacity) {
return ListpackWrap{lpNew(capacity)};
}

uint8_t* ListpackWrap::GetPointer() {
dirty_ = false;
return lp_;
}

Expand All @@ -59,7 +58,6 @@ bool ListpackWrap::Delete(std::string_view key) {
return false;

lp_ = lpDeleteRangeWithEntry(lp_, &ptr, 2);
dirty_ = true;
return true;
}

Expand All @@ -86,7 +84,6 @@ bool ListpackWrap::Insert(std::string_view key, std::string_view value, bool ski
lp_ = lpReplace(lp_, &vptr, vsrc, value.size());
DCHECK_EQ(0u, lpLength(lp_) % 2);

dirty_ = true;
updated = true;
}
}
Expand All @@ -96,7 +93,6 @@ bool ListpackWrap::Insert(std::string_view key, std::string_view value, bool ski
// TODO: we should at least allocate once for both elements
lp_ = lpAppend(lp_, fsrc, key.size());
lp_ = lpAppend(lp_, vsrc, value.size());
dirty_ = true;
}

return !updated;
Expand All @@ -106,6 +102,10 @@ size_t ListpackWrap::size() const {
return lpLength(lp_) / 2;
}

size_t ListpackWrap::DataBytes() const {
return lpBytes(lp_);
}

ListpackWrap::Iterator ListpackWrap::begin() const {
return Iterator{lp_, lpFirst(lp_), intbuf_};
}
Expand Down
8 changes: 5 additions & 3 deletions src/core/detail/listpack_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ struct ListpackWrap {
using IntBuf = uint8_t[2][24];

public:
~ListpackWrap();

struct Iterator {
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
Expand Down Expand Up @@ -48,6 +46,9 @@ struct ListpackWrap {
explicit ListpackWrap(uint8_t* lp) : lp_{lp} {
}

// Create listpack with capacity
static ListpackWrap WithCapacity(size_t capacity);

uint8_t* GetPointer(); // Get new updated pointer
Iterator Find(std::string_view key) const; // Linear search
bool Delete(std::string_view key);
Expand All @@ -57,13 +58,14 @@ struct ListpackWrap {
Iterator end() const;
size_t size() const; // number of entries

size_t DataBytes() const;

// Get view from raw listpack iterator
static std::string_view GetView(uint8_t* lp_it, uint8_t int_buf[]);

private:
uint8_t* lp_; // the listpack itself
mutable IntBuf intbuf_; // buffer for integers decoded to strings
bool dirty_ = false; // whether lp_ was updated, but never retrieved with GetPointer
};

} // namespace dfly::detail
4 changes: 3 additions & 1 deletion src/server/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ bool ParseDouble(string_view src, double* value) {
#define ADD(x) (x) += o.x

TieredStats& TieredStats::operator+=(const TieredStats& o) {
static_assert(sizeof(TieredStats) == 160);
static_assert(sizeof(TieredStats) == 168);

ADD(total_stashes);
ADD(total_fetches);
Expand All @@ -182,6 +182,8 @@ TieredStats& TieredStats::operator+=(const TieredStats& o) {
ADD(small_bins_cnt);
ADD(small_bins_entries_cnt);
ADD(small_bins_filling_bytes);
ADD(small_bins_filling_entries_cnt);

ADD(total_stash_overflows);
ADD(cold_storage_bytes);
ADD(total_offloading_steps);
Expand Down
1 change: 1 addition & 0 deletions src/server/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ struct TieredStats {
uint64_t small_bins_cnt = 0;
uint64_t small_bins_entries_cnt = 0;
size_t small_bins_filling_bytes = 0;
size_t small_bins_filling_entries_cnt = 0;
size_t cold_storage_bytes = 0;

uint64_t clients_throttled = 0; // current number of throttled clients
Expand Down
108 changes: 86 additions & 22 deletions src/server/hset_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ struct HMapWrap {
}

public:
// Create from non-external prime value
HMapWrap(const PrimeValue& pv, DbContext db_cntx) {
DCHECK(!pv.IsExternal() || pv.IsCool());
if (pv.Encoding() == kEncodingListPack)
Expand All @@ -83,6 +84,9 @@ struct HMapWrap {
impl_ = GetStringMap(pv, db_cntx);
}

explicit HMapWrap(detail::ListpackWrap lw) : impl_{std::move(lw)} {
}

explicit HMapWrap(tiering::SerializedMap* sm) : impl_{sm} {
}

Expand Down Expand Up @@ -193,7 +197,12 @@ OpResult<T> ExecuteRO(Transaction* tx, F&& f) {
using D = tiering::SerializedMapDecoder;
util::fb2::Future<OpResult<T>> fut;
auto read_cb = [fut, f = std::move(f)](io::Result<D*> res) mutable {
HMapWrap hw{res.value()->Get()};
// Create wrapper from different types
Overloaded ov{
[](tiering::SerializedMap* sm) { return HMapWrap{sm}; },
[](detail::ListpackWrap* lw) { return HMapWrap{*lw}; },
};
auto hw = visit(ov, res.value()->Read());
fut.Resolve(f(hw));
};

Expand All @@ -216,15 +225,34 @@ OpResult<T> ExecuteRO(Transaction* tx, F&& f) {
}

// Wrap write handler
template <typename F> auto WrapW(F&& f) {
using RT = std::invoke_result_t<F, HMapWrap&>;
return [f = std::forward<F>(f)](Transaction* t, EngineShard* es) -> RT {
template <typename F> auto ExecuteW(Transaction* tx, F&& f) {
using T = typename std::invoke_result_t<F, HMapWrap&>::Type;
auto shard_cb = [f = std::forward<F>(f)](Transaction* t,
EngineShard* es) -> OpResult<CbVariant<T>> {
// Fetch value of hash type
auto [key, op_args] = KeyAndArgs(t, es);

auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_HASH);
RETURN_ON_BAD_STATUS(it_res);
auto& pv = it_res->it->second;

// Enqueue read for future values
if (pv.IsExternal() && !pv.IsCool()) {
using D = tiering::SerializedMapDecoder;
util::fb2::Future<OpResult<T>> fut;
auto read_cb = [fut, f = std::move(f)](io::Result<D*> res) mutable {
// Create wrapper from different types
HMapWrap hw{*res.value()->Write()};
fut.Resolve(f(hw));

// soak listpack wrapper back to get updated value
*res.value()->Write() = *hw.Get<detail::ListpackWrap>();
};

es->tiered_storage()->Read(op_args.db_cntx.db_index, key, pv, D{}, std::move(read_cb));
return CbVariant<T>{std::move(fut)};
}

// Remove document before modification
op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, pv);

Expand All @@ -240,8 +268,11 @@ template <typename F> auto WrapW(F&& f) {
else
op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);

return res;
RETURN_ON_BAD_STATUS(res);
return CbVariant<T>{std::move(res).value()};
};

return Unwrap(tx->ScheduleSingleHopT(std::move(shard_cb)));
}

size_t EstimateListpackMinBytes(CmdArgList members) {
Expand Down Expand Up @@ -391,28 +422,27 @@ OpResult<vector<OptStr>> OpHMGet(const HMapWrap& hw, CmdArgList fields) {
DCHECK(!fields.empty());

std::vector<OptStr> result(fields.size());
if (auto lw = hw.Get<detail::ListpackWrap>(); lw) {
if (auto sm = hw.Get<StringMap*>(); sm) {
for (size_t i = 0; i < fields.size(); ++i) {
if (auto it = (*sm)->Find(fields[i]); it != (*sm)->end()) {
result[i].emplace(it->second, sdslen(it->second));
}
}
} else {
absl::flat_hash_map<string_view, absl::InlinedVector<size_t, 3>> reverse;
reverse.reserve(fields.size() + 1);
for (size_t i = 0; i < fields.size(); ++i) {
reverse[ArgS(fields, i)].push_back(i); // map fields to their index.
}

for (const auto [key, value] : *lw) {
for (const auto [key, value] : hw.Range()) {
if (auto it = reverse.find(key); it != reverse.end()) {
for (size_t index : it->second) {
DCHECK_LT(index, result.size());
result[index].emplace(value);
}
}
}
} else {
StringMap* sm = *hw.Get<StringMap*>();
for (size_t i = 0; i < fields.size(); ++i) {
if (auto it = sm->Find(fields[i]); it != sm->end()) {
result[i].emplace(it->second, sdslen(it->second));
}
}
}

return result;
Expand All @@ -424,8 +454,9 @@ struct OpSetParams {
bool keepttl = false;
};

OpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList values,
const OpSetParams& op_sp = OpSetParams{}) {
OpResult<CbVariant<uint32_t>> OpSet(const OpArgs& op_args, string_view key, CmdArgList values,
const OpSetParams& op_sp = OpSetParams{},
optional<util::fb2::Future<bool>>* bp_anker = nullptr) {
DCHECK(!values.empty() && 0 == values.size() % 2);
VLOG(2) << "OpSet(" << key << ")";

Expand All @@ -438,6 +469,26 @@ OpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList valu
auto& it = add_res.it;
PrimeValue& pv = it->second;

// If the value is external, enqueue read and modify it there
if (pv.IsExternal() && !pv.IsCool()) {
CHECK(op_sp.ttl == UINT32_MAX); // TODO: remove
using D = tiering::SerializedMapDecoder;
util::fb2::Future<OpResult<uint32_t>> fut;
auto read_cb = [fut, values, &op_sp](io::Result<D*> res) mutable {
// Create wrapper from different types
auto& lw = *res.value()->Write();
uint32_t created = 0;
for (size_t i = 0; i < values.size(); i += 2) {
created += lw.Insert(values[i], values[i + 1], op_sp.skip_if_exists);
}
fut.Resolve(created);
};

op_args.shard->tiered_storage()->Read(op_args.db_cntx.db_index, key, pv, D{},
std::move(read_cb));
return CbVariant<uint32_t>{std::move(fut)};
}

if (add_res.is_new) {
if (op_sp.ttl == UINT32_MAX) {
lp = lpNew(0);
Expand Down Expand Up @@ -492,7 +543,13 @@ OpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList valu

op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);

return created;
if (auto* ts = op_args.shard->tiered_storage(); ts) {
auto bp = ts->TryStash(op_args.db_cntx.db_index, key, &pv, true);
if (bp && bp_anker)
*bp_anker = std::move(*bp);
}

return CbVariant<uint32_t>{created};
}

void HGetGeneric(CmdArgList args, uint8_t getall_mask, Transaction* tx, SinkReplyBuilder* builder) {
Expand Down Expand Up @@ -584,7 +641,8 @@ void HSetEx(CmdArgList args, const CommandContext& cmd_cntx) {
return OpSet(t->GetOpArgs(shard), key, fields, op_sp);
};

OpResult<uint32_t> result = cmd_cntx.tx->ScheduleSingleHopT(std::move(cb));
auto delayed_result = cmd_cntx.tx->ScheduleSingleHopT(std::move(cb));
OpResult<uint32_t> result = Unwrap(std::move(delayed_result));
if (result) {
cmd_cntx.rb->SendLong(*result);
} else {
Expand Down Expand Up @@ -615,7 +673,7 @@ void HSetFamily::HDel(CmdArgList args, const CommandContext& cmd_cntx) {
deleted += hw.Erase(s);
return deleted;
};
HSetReplies{cmd_cntx.rb}.Send(cmd_cntx.tx->ScheduleSingleHopT(WrapW(cb)));
HSetReplies{cmd_cntx.rb}.Send(ExecuteW(cmd_cntx.tx, std::move(cb)));
}

void HSetFamily::HExpire(CmdArgList args, const CommandContext& cmd_cntx) {
Expand Down Expand Up @@ -853,12 +911,18 @@ void HSetFamily::HSet(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(facade::WrongNumArgsError(cmd), kSyntaxErrType);
}

optional<util::fb2::Future<bool>> tiered_backpressure;

args.remove_prefix(1);
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpSet(t->GetOpArgs(shard), key, args);
return OpSet(t->GetOpArgs(shard), key, args, OpSetParams{}, &tiered_backpressure);
};

OpResult<uint32_t> result = cmd_cntx.tx->ScheduleSingleHopT(std::move(cb));
auto delayed_result = cmd_cntx.tx->ScheduleSingleHopT(std::move(cb));
OpResult<uint32_t> result = Unwrap(std::move(delayed_result));

if (tiered_backpressure)
tiered_backpressure->GetFor(10ms);

if (result && cmd == "HSET") {
cmd_cntx.rb->SendLong(*result);
Expand All @@ -873,7 +937,7 @@ void HSetFamily::HSetNx(CmdArgList args, const CommandContext& cmd_cntx) {
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpSet(t->GetOpArgs(shard), key, args.subspan(1), OpSetParams{.skip_if_exists = true});
};
HSetReplies{cmd_cntx.rb}.Send(cmd_cntx.tx->ScheduleSingleHopT(cb));
HSetReplies{cmd_cntx.rb}.Send(Unwrap(cmd_cntx.tx->ScheduleSingleHopT(cb)));
}

void StrVecEmplaceBack(StringVec& str_vec, const listpackEntry& lp) {
Expand Down
Loading
Loading