|
16 | 16 | #include <ostream> |
17 | 17 | #include "include/ceph_assert.h" |
18 | 18 | #include "bluestore_types.h" |
| 19 | +#include "common/ceph_mutex.h" |
19 | 20 |
|
20 | 21 | typedef interval_set<uint64_t> release_set_t; |
21 | 22 | typedef release_set_t::value_type release_set_entry_t; |
22 | 23 |
|
23 | 24 | class Allocator { |
| 25 | +protected: |
| 26 | + |
| 27 | + struct ExtentCollectionTraits { |
| 28 | + size_t num_buckets; |
| 29 | + size_t base_bits; // min extent size |
| 30 | + size_t base = 1ull << base_bits; |
| 31 | + size_t factor; // single bucket size range to be |
| 32 | + // determined as [len, len * factor * 2) |
| 33 | + // for log2(len) indexing and |
| 34 | + // [len, len + factor * base) |
| 35 | + // for linear indexing. |
| 36 | + |
| 37 | + |
| 38 | + ExtentCollectionTraits(size_t _num_buckets, |
| 39 | + size_t _base_bits = 12, //= 4096 bytes |
| 40 | + size_t _factor = 1) : |
| 41 | + num_buckets(_num_buckets), |
| 42 | + base_bits(_base_bits), |
| 43 | + base(1ull << base_bits), |
| 44 | + factor(_factor) |
| 45 | + { |
| 46 | + ceph_assert(factor); |
| 47 | + } |
| 48 | + |
| 49 | + /* |
| 50 | + * Determines bucket index for a given extent's length in a bucket collection |
| 51 | + * with log2(len) indexing. |
| 52 | + * The last bucket index is returned for lengths above the maximum. |
| 53 | + */ |
| 54 | + inline size_t _get_p2_size_bucket(uint64_t len) const { |
| 55 | + size_t idx; |
| 56 | + const size_t len_p2_max = |
| 57 | + base << ((factor * (num_buckets - 2))); |
| 58 | + if (len <= base) { |
| 59 | + idx = 0; |
| 60 | + } else if (len > len_p2_max) { |
| 61 | + idx = num_buckets - 1; |
| 62 | + } else { |
| 63 | + size_t most_bit = cbits(uint64_t(len - 1)) - 1; |
| 64 | + idx = 1 + ((most_bit - base_bits) / factor); |
| 65 | + } |
| 66 | + ceph_assert(idx < num_buckets); |
| 67 | + return idx; |
| 68 | + } |
| 69 | + /* |
| 70 | + * Determines bucket index for a given extent's length in a bucket collection |
| 71 | + * with linear (len / min_extent_size) indexing. |
| 72 | + * The last bucket index is returned for lengths above the maximum. |
| 73 | + */ |
| 74 | + inline size_t _get_linear_size_bucket(uint64_t len) const { |
| 75 | + size_t idx = (len / factor) >> base_bits; |
| 76 | + idx = idx < num_buckets ? idx : num_buckets - 1; |
| 77 | + return idx; |
| 78 | + } |
| 79 | + }; |
| 80 | + |
| 81 | + /* |
| 82 | + * Lockless stack implementation |
| 83 | + * that permits put/get operation exclusively |
| 84 | + * if no waiting is needed. |
| 85 | + * Conflicting operations are omitted. |
| 86 | + */ |
| 87 | + class LocklessOpportunisticStack { |
| 88 | + std::atomic<size_t> ref = 0; |
| 89 | + std::atomic<size_t> count = 0; |
| 90 | + std::vector<uint64_t> data; |
| 91 | + public: |
| 92 | + void init(size_t size) { |
| 93 | + data.resize(size); |
| 94 | + } |
| 95 | + bool try_put(uint64_t& v) { |
| 96 | + bool done = ++ref == 1 && count < data.size(); |
| 97 | + if (done) { |
| 98 | + data[count++] = v; |
| 99 | + } |
| 100 | + --ref; |
| 101 | + return done; |
| 102 | + } |
| 103 | + bool try_get(uint64_t& v) { |
| 104 | + bool done = ++ref == 1 && count > 0; |
| 105 | + if (done) { |
| 106 | + v = data[--count]; |
| 107 | + } |
| 108 | + --ref; |
| 109 | + return done; |
| 110 | + } |
| 111 | + void foreach(std::function<void(uint64_t)> notify) { |
| 112 | + for (size_t i = 0; i < count; i++) { |
| 113 | + notify(data[i]); |
| 114 | + } |
| 115 | + } |
| 116 | + }; |
| 117 | + /* |
| 118 | + * Concurrently accessed extent (offset,length) cache |
| 119 | + * which permits put/get operation exclusively if no waiting is needed. |
| 120 | + * Implemented via a set of independent buckets (aka LocklessOpportunisticStack). |
| 121 | + * Each bucket keeps extents of specific size only: 4K, 8K, 12K...64K |
| 122 | + * which allows to avoid individual extent size tracking. |
| 123 | + * Each bucket permits a single operation at a given time only, |
| 124 | + * additional operations against the bucket are rejected meaning relevant |
| 125 | + * extents aren't not cached. |
| 126 | + */ |
| 127 | + class OpportunisticExtentCache { |
| 128 | + const Allocator::ExtentCollectionTraits myTraits; |
| 129 | + enum { |
| 130 | + BUCKET_COUNT = 16, |
| 131 | + EXTENTS_PER_BUCKET = 16, // amount of entries per single bucket, |
| 132 | + // total amount of entries will be |
| 133 | + // BUCKET_COUNT * EXTENTS_PER_BUCKET. |
| 134 | + }; |
| 135 | + |
| 136 | + std::vector<LocklessOpportunisticStack> buckets; |
| 137 | + std::atomic<size_t> hits = 0; |
| 138 | + ceph::shared_mutex lock{ |
| 139 | + ceph::make_shared_mutex(std::string(), false, false, false) |
| 140 | + }; |
| 141 | + public: |
| 142 | + OpportunisticExtentCache() : |
| 143 | + myTraits(BUCKET_COUNT + 1), // 16 regular buckets + 1 "catch-all" pseudo |
| 144 | + // one to be used for out-of-bound checking |
| 145 | + // since _get_*_size_bucket() methods imply |
| 146 | + // the last bucket usage for the entries |
| 147 | + // exceeding the max length. |
| 148 | + buckets(BUCKET_COUNT) |
| 149 | + { |
| 150 | + //buckets.resize(BUCKET_COUNT); |
| 151 | + for(auto& b : buckets) { |
| 152 | + b.init(EXTENTS_PER_BUCKET); |
| 153 | + } |
| 154 | + } |
| 155 | + bool try_put(uint64_t offset, uint64_t len) { |
| 156 | + if (!lock.try_lock_shared()) { |
| 157 | + return false; |
| 158 | + } |
| 159 | + bool ret = false; |
| 160 | + ceph_assert(p2aligned(offset, myTraits.base)); |
| 161 | + ceph_assert(p2aligned(len, myTraits.base)); |
| 162 | + auto idx = myTraits._get_linear_size_bucket(len); |
| 163 | + if (idx < buckets.size()) |
| 164 | + ret = buckets[idx].try_put(offset); |
| 165 | + lock.unlock_shared(); |
| 166 | + return ret; |
| 167 | + } |
| 168 | + bool try_get(uint64_t* offset, uint64_t len) { |
| 169 | + if (!lock.try_lock_shared()) { |
| 170 | + return false; |
| 171 | + } |
| 172 | + bool ret = false; |
| 173 | + ceph_assert(offset); |
| 174 | + ceph_assert(p2aligned(len, myTraits.base)); |
| 175 | + size_t idx = len >> myTraits.base_bits; |
| 176 | + if (idx < buckets.size()) { |
| 177 | + ret = buckets[idx].try_get(*offset); |
| 178 | + if (ret) { |
| 179 | + ++hits; |
| 180 | + } |
| 181 | + } |
| 182 | + lock.unlock_shared(); |
| 183 | + return ret; |
| 184 | + } |
| 185 | + size_t get_hit_count() const { |
| 186 | + return hits.load(); |
| 187 | + } |
| 188 | + void foreach(std::function<void(uint64_t offset, uint64_t length)> notify) { |
| 189 | + std::unique_lock _lock(lock); |
| 190 | + for (uint64_t i = 0; i < buckets.size(); i++) { |
| 191 | + auto cb = [&](uint64_t o) { |
| 192 | + notify(o, i << myTraits.base_bits); |
| 193 | + }; |
| 194 | + buckets[i].foreach(cb); |
| 195 | + } |
| 196 | + } |
| 197 | + }; |
24 | 198 |
|
25 | 199 | public: |
26 | 200 | Allocator(std::string_view name, |
|
0 commit comments