Skip to content

Commit 89f42f1

Browse files
authored
Merge pull request ceph#60278 from rzarzynski/wip-os-fastomapiter
os, osd: bring the lightweight OMAP iteration Reviewed-by: Casey Bodley <[email protected]> Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Mark Kogan <[email protected]> Reviewed-by: Adam Kupczyk <[email protected]> Reviewed-by: Samuel Just <[email protected]>
2 parents 1e49516 + cbc771a commit 89f42f1

File tree

17 files changed

+502
-123
lines changed

17 files changed

+502
-123
lines changed

src/common/ceph_time.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,23 @@ class coarse_mono_clock {
342342
}
343343
};
344344

345+
// Please note time_guard is not thread safety -- multiple threads
346+
// updating same diff_accumulator can corrupt it.
347+
template <class ClockT = mono_clock>
348+
class time_guard {
349+
const typename ClockT::time_point start;
350+
timespan& diff_accumulator;
351+
352+
public:
353+
time_guard(timespan& diff_accumulator)
354+
: start(ClockT::now()),
355+
diff_accumulator(diff_accumulator) {
356+
}
357+
~time_guard() {
358+
diff_accumulator += ClockT::now() - start;
359+
}
360+
};
361+
345362
namespace time_detail {
346363
// So that our subtractions produce negative spans rather than
347364
// arithmetic underflow.

src/crimson/os/alienstore/alien_store.cc

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,21 @@ auto AlienStore::omap_get_values(CollectionRef ch,
435435
return do_with_op_gate(omap_values_t{}, [=, this] (auto &values) {
436436
return tp->submit(ch->get_cid().hash_to_shard(tp->size()), [=, this, &values] {
437437
auto c = static_cast<AlienCollection*>(ch.get());
438-
return store->omap_get_values(c->collection, oid, start,
439-
reinterpret_cast<map<string, bufferlist>*>(&values));
438+
return store->omap_iterate(
439+
c->collection, oid,
440+
ObjectStore::omap_iter_seek_t{
441+
.seek_position = start.value_or(std::string{}),
442+
// FIXME: classical OSDs begins iteration from LOWER_BOUND
443+
// (or UPPER_BOUND if filter_prefix > start). However, these
444+
// bits are not implemented yet
445+
.seek_type = ObjectStore::omap_iter_seek_t::UPPER_BOUND
446+
},
447+
[&values]
448+
(std::string_view key, std::string_view value) mutable {
449+
values[std::string{key}].append(value);
450+
// FIXME: there is limit on number of entries yet
451+
return ObjectStore::omap_iter_ret_t::NEXT;
452+
});
440453
}).then([&values] (int r)
441454
-> read_errorator::future<std::tuple<bool, omap_values_t>> {
442455
if (r == -ENOENT) {

src/kv/KeyValueDB.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <map>
1010
#include <optional>
1111
#include <string>
12+
#include <string_view>
1213
#include <boost/scoped_ptr.hpp>
1314
#include "include/encoding.h"
1415
#include "common/Formatter.h"
@@ -211,6 +212,10 @@ class KeyValueDB {
211212
return "";
212213
}
213214
virtual ceph::buffer::list value() = 0;
215+
// When valid() returns true, value returned as string-view
216+
// is guaranteed to be valid until iterator is moved to another
217+
// position; that is until call to next() / seek_to_first() / etc.
218+
virtual std::string_view value_as_sv() = 0;
214219
virtual int status() = 0;
215220
virtual ~SimplestIteratorImpl() {}
216221
};
@@ -220,7 +225,12 @@ class KeyValueDB {
220225
virtual ~IteratorImpl() {}
221226
virtual int seek_to_last() = 0;
222227
virtual int prev() = 0;
228+
// When valid() returns true, key returned as string-view
229+
// is guaranteed to be valid until iterator is moved to another
230+
// position; that is until call to next() / seek_to_first() / etc.
231+
virtual std::string_view key_as_sv() = 0;
223232
virtual std::pair<std::string, std::string> raw_key() = 0;
233+
virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0;
224234
virtual ceph::buffer::ptr value_as_ptr() {
225235
ceph::buffer::list bl = value();
226236
if (bl.length() == 1) {
@@ -247,7 +257,9 @@ class KeyValueDB {
247257
virtual int next() = 0;
248258
virtual int prev() = 0;
249259
virtual std::string key() = 0;
260+
virtual std::string_view key_as_sv() = 0;
250261
virtual std::pair<std::string,std::string> raw_key() = 0;
262+
virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0;
251263
virtual bool raw_key_is_prefixed(const std::string &prefix) = 0;
252264
virtual ceph::buffer::list value() = 0;
253265
virtual ceph::buffer::ptr value_as_ptr() {
@@ -258,6 +270,7 @@ class KeyValueDB {
258270
return ceph::buffer::ptr();
259271
}
260272
}
273+
virtual std::string_view value_as_sv() = 0;
261274
virtual int status() = 0;
262275
virtual size_t key_size() {
263276
return 0;
@@ -315,15 +328,24 @@ class KeyValueDB {
315328
std::string key() override {
316329
return generic_iter->key();
317330
}
331+
std::string_view key_as_sv() override {
332+
return generic_iter->key_as_sv();
333+
}
318334
std::pair<std::string, std::string> raw_key() override {
319335
return generic_iter->raw_key();
320336
}
337+
std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
338+
return generic_iter->raw_key_as_sv();
339+
}
321340
ceph::buffer::list value() override {
322341
return generic_iter->value();
323342
}
324343
ceph::buffer::ptr value_as_ptr() override {
325344
return generic_iter->value_as_ptr();
326345
}
346+
std::string_view value_as_sv() override {
347+
return generic_iter->value_as_sv();
348+
}
327349
int status() override {
328350
return generic_iter->status();
329351
}

src/kv/RocksDBStore.cc

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <memory>
77
#include <set>
88
#include <string>
9+
#include <string_view>
910
#include <errno.h>
1011
#include <unistd.h>
1112
#include <sys/types.h>
@@ -47,6 +48,7 @@ using std::ostream;
4748
using std::pair;
4849
using std::set;
4950
using std::string;
51+
using std::string_view;
5052
using std::unique_ptr;
5153
using std::vector;
5254

@@ -1992,7 +1994,7 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)
19921994

19931995
// Find separator inside Slice
19941996
char* separator = (char*) memchr(in.data(), 0, in.size());
1995-
if (separator == NULL)
1997+
if (separator == nullptr)
19961998
return -EINVAL;
19971999
prefix_len = size_t(separator - in.data());
19982000
if (prefix_len >= in.size())
@@ -2006,6 +2008,27 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)
20062008
return 0;
20072009
}
20082010

2011+
// TODO: deduplicate the code, preferrably by removing the string variant
2012+
int RocksDBStore::split_key(rocksdb::Slice in, string_view *prefix, string_view *key)
2013+
{
2014+
size_t prefix_len = 0;
2015+
2016+
// Find separator inside Slice
2017+
char* separator = (char*) memchr(in.data(), 0, in.size());
2018+
if (separator == nullptr)
2019+
return -EINVAL;
2020+
prefix_len = size_t(separator - in.data());
2021+
if (prefix_len >= in.size())
2022+
return -EINVAL;
2023+
2024+
// Fetch prefix and/or key directly from Slice
2025+
if (prefix)
2026+
*prefix = string_view(in.data(), prefix_len);
2027+
if (key)
2028+
*key = string_view(separator + 1, in.size() - prefix_len - 1);
2029+
return 0;
2030+
}
2031+
20092032
void RocksDBStore::compact()
20102033
{
20112034
dout(2) << __func__ << " starting" << dendl;
@@ -2226,7 +2249,13 @@ int RocksDBStore::RocksDBWholeSpaceIteratorImpl::prev()
22262249
string RocksDBStore::RocksDBWholeSpaceIteratorImpl::key()
22272250
{
22282251
string out_key;
2229-
split_key(dbiter->key(), 0, &out_key);
2252+
split_key(dbiter->key(), nullptr, &out_key);
2253+
return out_key;
2254+
}
2255+
string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::key_as_sv()
2256+
{
2257+
string_view out_key;
2258+
split_key(dbiter->key(), nullptr, &out_key);
22302259
return out_key;
22312260
}
22322261
pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key()
@@ -2235,6 +2264,12 @@ pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key()
22352264
split_key(dbiter->key(), &prefix, &key);
22362265
return make_pair(prefix, key);
22372266
}
2267+
pair<string_view,string_view> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_as_sv()
2268+
{
2269+
string_view prefix, key;
2270+
split_key(dbiter->key(), &prefix, &key);
2271+
return make_pair(prefix, key);
2272+
}
22382273

22392274
bool RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_is_prefixed(const string &prefix) {
22402275
// Look for "prefix\0" right in rocksb::Slice
@@ -2267,6 +2302,12 @@ bufferptr RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_ptr()
22672302
return bufferptr(val.data(), val.size());
22682303
}
22692304

2305+
std::string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_sv()
2306+
{
2307+
rocksdb::Slice val = dbiter->value();
2308+
return std::string_view{val.data(), val.size()};
2309+
}
2310+
22702311
int RocksDBStore::RocksDBWholeSpaceIteratorImpl::status()
22712312
{
22722313
return dbiter->status().ok() ? 0 : -1;
@@ -2348,16 +2389,26 @@ class CFIteratorImpl : public KeyValueDB::IteratorImpl {
23482389
string key() override {
23492390
return dbiter->key().ToString();
23502391
}
2392+
string_view key_as_sv() override {
2393+
return dbiter->key().ToStringView();
2394+
}
23512395
std::pair<std::string, std::string> raw_key() override {
23522396
return make_pair(prefix, key());
23532397
}
2398+
std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
2399+
return make_pair(prefix, dbiter->key().ToStringView());
2400+
}
23542401
bufferlist value() override {
23552402
return to_bufferlist(dbiter->value());
23562403
}
23572404
bufferptr value_as_ptr() override {
23582405
rocksdb::Slice val = dbiter->value();
23592406
return bufferptr(val.data(), val.size());
23602407
}
2408+
std::string_view value_as_sv() override {
2409+
rocksdb::Slice val = dbiter->value();
2410+
return std::string_view{val.data(), val.size()};
2411+
}
23612412
int status() override {
23622413
return dbiter->status().ok() ? 0 : -1;
23632414
}
@@ -2668,6 +2719,15 @@ class WholeMergeIteratorImpl : public KeyValueDB::WholeSpaceIteratorImpl {
26682719
}
26692720
}
26702721

2722+
std::string_view key_as_sv() override
2723+
{
2724+
if (smaller == on_main) {
2725+
return main->key_as_sv();
2726+
} else {
2727+
return current_shard->second->key_as_sv();
2728+
}
2729+
}
2730+
26712731
std::pair<std::string,std::string> raw_key() override
26722732
{
26732733
if (smaller == on_main) {
@@ -2677,6 +2737,15 @@ class WholeMergeIteratorImpl : public KeyValueDB::WholeSpaceIteratorImpl {
26772737
}
26782738
}
26792739

2740+
std::pair<std::string_view,std::string_view> raw_key_as_sv() override
2741+
{
2742+
if (smaller == on_main) {
2743+
return main->raw_key_as_sv();
2744+
} else {
2745+
return { current_shard->first, current_shard->second->key_as_sv() };
2746+
}
2747+
}
2748+
26802749
bool raw_key_is_prefixed(const std::string &prefix) override
26812750
{
26822751
if (smaller == on_main) {
@@ -2695,6 +2764,15 @@ class WholeMergeIteratorImpl : public KeyValueDB::WholeSpaceIteratorImpl {
26952764
}
26962765
}
26972766

2767+
std::string_view value_as_sv() override
2768+
{
2769+
if (smaller == on_main) {
2770+
return main->value_as_sv();
2771+
} else {
2772+
return current_shard->second->value_as_sv();
2773+
}
2774+
}
2775+
26982776
int status() override
26992777
{
27002778
//because we already had to inspect key, it must be ok
@@ -3017,16 +3095,26 @@ class ShardMergeIteratorImpl : public KeyValueDB::IteratorImpl {
30173095
string key() override {
30183096
return iters[0]->key().ToString();
30193097
}
3098+
string_view key_as_sv() override {
3099+
return iters[0]->key().ToStringView();
3100+
}
30203101
std::pair<std::string, std::string> raw_key() override {
30213102
return make_pair(prefix, key());
30223103
}
3104+
std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
3105+
return make_pair(prefix, iters[0]->key().ToStringView());
3106+
}
30233107
bufferlist value() override {
30243108
return to_bufferlist(iters[0]->value());
30253109
}
30263110
bufferptr value_as_ptr() override {
30273111
rocksdb::Slice val = iters[0]->value();
30283112
return bufferptr(val.data(), val.size());
30293113
}
3114+
std::string_view value_as_sv() override {
3115+
rocksdb::Slice val = iters[0]->value();
3116+
return std::string_view{val.data(), val.size()};
3117+
}
30303118
int status() override {
30313119
return iters[0]->status().ok() ? 0 : -1;
30323120
}

src/kv/RocksDBStore.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,13 @@ class RocksDBStore : public KeyValueDB {
386386
int next() override;
387387
int prev() override;
388388
std::string key() override;
389+
std::string_view key_as_sv() override;
389390
std::pair<std::string,std::string> raw_key() override;
391+
std::pair<std::string_view,std::string_view> raw_key_as_sv() override;
390392
bool raw_key_is_prefixed(const std::string &prefix) override;
391393
ceph::bufferlist value() override;
392394
ceph::bufferptr value_as_ptr() override;
395+
std::string_view value_as_sv() override;
393396
int status() override;
394397
size_t key_size() override;
395398
size_t value_size() override;
@@ -419,6 +422,7 @@ class RocksDBStore : public KeyValueDB {
419422
}
420423

421424
static int split_key(rocksdb::Slice in, std::string *prefix, std::string *key);
425+
static int split_key(rocksdb::Slice in, std::string_view *prefix, std::string_view *key);
422426

423427
static std::string past_prefix(const std::string &prefix);
424428

src/os/DBObjectMap.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,11 @@ bufferlist DBObjectMap::DBObjectMapIteratorImpl::value()
519519
return cur_iter->value();
520520
}
521521

522+
std::string_view DBObjectMap::DBObjectMapIteratorImpl::value_as_sv()
523+
{
524+
return cur_iter->value_as_sv();
525+
}
526+
522527
int DBObjectMap::DBObjectMapIteratorImpl::status()
523528
{
524529
return r;

src/os/DBObjectMap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ class DBObjectMap : public ObjectMap {
393393
int next() override { ceph_abort(); return 0; }
394394
std::string key() override { ceph_abort(); return ""; }
395395
ceph::buffer::list value() override { ceph_abort(); return ceph::buffer::list(); }
396+
std::string_view value_as_sv() override { ceph_abort(); return std::string_view(); }
396397
int status() override { return 0; }
397398
};
398399

@@ -431,6 +432,7 @@ class DBObjectMap : public ObjectMap {
431432
int next() override;
432433
std::string key() override;
433434
ceph::buffer::list value() override;
435+
std::string_view value_as_sv() override;
434436
int status() override;
435437

436438
bool on_parent() {

0 commit comments

Comments
 (0)