Skip to content

Commit 3db0e22

Browse files
committed
os/bluestore: move and rename ExtentCache to Allocator class.
Signed-off-by: Igor Fedotov <[email protected]>
1 parent 54b6d24 commit 3db0e22

File tree

3 files changed

+193
-141
lines changed

3 files changed

+193
-141
lines changed

src/os/bluestore/Allocator.h

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,185 @@
1616
#include <ostream>
1717
#include "include/ceph_assert.h"
1818
#include "bluestore_types.h"
19+
#include "common/ceph_mutex.h"
1920

2021
typedef interval_set<uint64_t> release_set_t;
2122
typedef release_set_t::value_type release_set_entry_t;
2223

2324
class Allocator {
25+
protected:
26+
27+
struct ExtentCollectionTraits {
28+
size_t num_buckets;
29+
size_t base_bits; // min extent size
30+
size_t base = 1ull << base_bits;
31+
size_t factor; // single bucket size range to be
32+
// determined as [len, len * factor * 2)
33+
// for log2(len) indexing and
34+
// [len, len + factor * base)
35+
// for linear indexing.
36+
37+
38+
ExtentCollectionTraits(size_t _num_buckets,
39+
size_t _base_bits = 12, //= 4096 bytes
40+
size_t _factor = 1) :
41+
num_buckets(_num_buckets),
42+
base_bits(_base_bits),
43+
base(1ull << base_bits),
44+
factor(_factor)
45+
{
46+
ceph_assert(factor);
47+
}
48+
49+
/*
50+
* Determines bucket index for a given extent's length in a bucket collection
51+
* with log2(len) indexing.
52+
* The last bucket index is returned for lengths above the maximum.
53+
*/
54+
inline size_t _get_p2_size_bucket(uint64_t len) const {
55+
size_t idx;
56+
const size_t len_p2_max =
57+
base << ((factor * (num_buckets - 2)));
58+
if (len <= base) {
59+
idx = 0;
60+
} else if (len > len_p2_max) {
61+
idx = num_buckets - 1;
62+
} else {
63+
size_t most_bit = cbits(uint64_t(len - 1)) - 1;
64+
idx = 1 + ((most_bit - base_bits) / factor);
65+
}
66+
ceph_assert(idx < num_buckets);
67+
return idx;
68+
}
69+
/*
70+
* Determines bucket index for a given extent's length in a bucket collection
71+
* with linear (len / min_extent_size) indexing.
72+
* The last bucket index is returned for lengths above the maximum.
73+
*/
74+
inline size_t _get_linear_size_bucket(uint64_t len) const {
75+
size_t idx = (len / factor) >> base_bits;
76+
idx = idx < num_buckets ? idx : num_buckets - 1;
77+
return idx;
78+
}
79+
};
80+
81+
/*
82+
* Lockless stack implementation
83+
* that permits put/get operation exclusively
84+
* if no waiting is needed.
85+
* Conflicting operations are omitted.
86+
*/
87+
class LocklessOpportunisticStack {
88+
std::atomic<size_t> ref = 0;
89+
std::atomic<size_t> count = 0;
90+
std::vector<uint64_t> data;
91+
public:
92+
void init(size_t size) {
93+
data.resize(size);
94+
}
95+
bool try_put(uint64_t& v) {
96+
bool done = ++ref == 1 && count < data.size();
97+
if (done) {
98+
data[count++] = v;
99+
}
100+
--ref;
101+
return done;
102+
}
103+
bool try_get(uint64_t& v) {
104+
bool done = ++ref == 1 && count > 0;
105+
if (done) {
106+
v = data[--count];
107+
}
108+
--ref;
109+
return done;
110+
}
111+
void foreach(std::function<void(uint64_t)> notify) {
112+
for (size_t i = 0; i < count; i++) {
113+
notify(data[i]);
114+
}
115+
}
116+
};
117+
/*
118+
* Concurrently accessed extent (offset,length) cache
119+
* which permits put/get operation exclusively if no waiting is needed.
120+
* Implemented via a set of independent buckets (aka LocklessOpportunisticStack).
121+
* Each bucket keeps extents of specific size only: 4K, 8K, 12K...64K
122+
* which allows to avoid individual extent size tracking.
123+
* Each bucket permits a single operation at a given time only,
124+
* additional operations against the bucket are rejected meaning relevant
125+
* extents aren't not cached.
126+
*/
127+
class OpportunisticExtentCache {
128+
const Allocator::ExtentCollectionTraits myTraits;
129+
enum {
130+
BUCKET_COUNT = 16,
131+
EXTENTS_PER_BUCKET = 16, // amount of entries per single bucket,
132+
// total amount of entries will be
133+
// BUCKET_COUNT * EXTENTS_PER_BUCKET.
134+
};
135+
136+
std::vector<LocklessOpportunisticStack> buckets;
137+
std::atomic<size_t> hits = 0;
138+
ceph::shared_mutex lock{
139+
ceph::make_shared_mutex(std::string(), false, false, false)
140+
};
141+
public:
142+
OpportunisticExtentCache() :
143+
myTraits(BUCKET_COUNT + 1), // 16 regular buckets + 1 "catch-all" pseudo
144+
// one to be used for out-of-bound checking
145+
// since _get_*_size_bucket() methods imply
146+
// the last bucket usage for the entries
147+
// exceeding the max length.
148+
buckets(BUCKET_COUNT)
149+
{
150+
//buckets.resize(BUCKET_COUNT);
151+
for(auto& b : buckets) {
152+
b.init(EXTENTS_PER_BUCKET);
153+
}
154+
}
155+
bool try_put(uint64_t offset, uint64_t len) {
156+
if (!lock.try_lock_shared()) {
157+
return false;
158+
}
159+
bool ret = false;
160+
ceph_assert(p2aligned(offset, myTraits.base));
161+
ceph_assert(p2aligned(len, myTraits.base));
162+
auto idx = myTraits._get_linear_size_bucket(len);
163+
if (idx < buckets.size())
164+
ret = buckets[idx].try_put(offset);
165+
lock.unlock_shared();
166+
return ret;
167+
}
168+
bool try_get(uint64_t* offset, uint64_t len) {
169+
if (!lock.try_lock_shared()) {
170+
return false;
171+
}
172+
bool ret = false;
173+
ceph_assert(offset);
174+
ceph_assert(p2aligned(len, myTraits.base));
175+
size_t idx = len >> myTraits.base_bits;
176+
if (idx < buckets.size()) {
177+
ret = buckets[idx].try_get(*offset);
178+
if (ret) {
179+
++hits;
180+
}
181+
}
182+
lock.unlock_shared();
183+
return ret;
184+
}
185+
size_t get_hit_count() const {
186+
return hits.load();
187+
}
188+
void foreach(std::function<void(uint64_t offset, uint64_t length)> notify) {
189+
std::unique_lock _lock(lock);
190+
for (uint64_t i = 0; i < buckets.size(); i++) {
191+
auto cb = [&](uint64_t o) {
192+
notify(o, i << myTraits.base_bits);
193+
};
194+
buckets[i].foreach(cb);
195+
}
196+
}
197+
};
24198

25199
public:
26200
Allocator(std::string_view name,

src/os/bluestore/Btree2Allocator.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@ Btree2Allocator::Btree2Allocator(CephContext* _cct,
2424
bool with_cache,
2525
std::string_view name) :
2626
Allocator(name, device_size, block_size),
27+
myTraits(RANGE_SIZE_BUCKET_COUNT),
2728
cct(_cct),
2829
range_count_cap(max_mem / sizeof(range_seg_t))
2930
{
3031
set_weight_factor(_rweight_factor);
3132
if (with_cache) {
32-
cache = new ChunkCache();
33+
cache = new OpportunisticExtentCache();
3334
}
35+
range_size_set.resize(myTraits.num_buckets);
3436
}
3537

3638
void Btree2Allocator::init_add_free(uint64_t offset, uint64_t length)
@@ -195,7 +197,7 @@ int64_t Btree2Allocator::_allocate(
195197
continue;
196198
}
197199
}
198-
size_t bucket0 = MyTraits::_get_size_bucket(want_now);
200+
size_t bucket0 = myTraits._get_p2_size_bucket(want_now);
199201
int64_t r = __allocate(bucket0, want_now,
200202
unit, extents);
201203
if (r < 0) {
@@ -329,7 +331,7 @@ int64_t Btree2Allocator::__allocate(
329331
auto rs_p = _pick_block(0, rs_tree, size);
330332

331333
if (rs_p == rs_tree->end()) {
332-
auto bucket_center = MyTraits::_get_size_bucket(weight_center);
334+
auto bucket_center = myTraits._get_p2_size_bucket(weight_center);
333335

334336
// requested size is to the left of weight center
335337
// hence we try to search up toward it first
@@ -356,7 +358,7 @@ int64_t Btree2Allocator::__allocate(
356358
bucket = dir < 0 ? bucket0 : bucket_center + 1;
357359
do {
358360
// try spilled over or different direction if bucket index is out of bounds
359-
if (bucket >= MyTraits::num_size_buckets) {
361+
if (bucket >= myTraits.num_buckets) {
360362
if (dir < 0) {
361363
// reached the bottom while going downhill,
362364
// time to try spilled over extents
@@ -376,7 +378,7 @@ int64_t Btree2Allocator::__allocate(
376378
dir = -dir;
377379
bucket = dir < 0 ? bucket0 : bucket_center + 1; // See above on new bucket
378380
// selection rationales
379-
ceph_assert(bucket < MyTraits::num_size_buckets); // this should never happen
381+
ceph_assert(bucket < myTraits.num_buckets); // this should never happen
380382
if (dir == dir0 ) {
381383
// stop if both directions already attempted
382384
return -ENOSPC;
@@ -465,7 +467,7 @@ void Btree2Allocator::_remove_from_tree(
465467
uint64_t end)
466468
{
467469
range_seg_t rs(rt_p->first, rt_p->second);
468-
size_t bucket = MyTraits::_get_size_bucket(rs.length());
470+
size_t bucket = myTraits._get_p2_size_bucket(rs.length());
469471
range_size_tree_t* rs_tree = &range_size_set[bucket];
470472
auto rs_p = rs_tree->find(rs);
471473
ceph_assert(rs_p != rs_tree->end());
@@ -569,7 +571,7 @@ bool Btree2Allocator::__try_insert_range(
569571
void Btree2Allocator::_range_size_tree_add(const range_seg_t& rs) {
570572
auto l = rs.length();
571573
ceph_assert(rs.end > rs.start);
572-
size_t bucket = MyTraits::_get_size_bucket(l);
574+
size_t bucket = myTraits._get_p2_size_bucket(l);
573575
range_size_set[bucket].insert(rs);
574576
num_free += l;
575577

@@ -582,7 +584,7 @@ void Btree2Allocator::_range_size_tree_add(const range_seg_t& rs) {
582584
}
583585
void Btree2Allocator::_range_size_tree_rm(const range_seg_t& rs)
584586
{
585-
size_t bucket = MyTraits::_get_size_bucket(rs.length());
587+
size_t bucket = myTraits._get_p2_size_bucket(rs.length());
586588
range_size_tree_t* rs_tree = &range_size_set[bucket];
587589
ceph_assert(rs_tree->size() > 0);
588590
auto rs_p = rs_tree->find(rs);

0 commit comments

Comments
 (0)