Skip to content

Commit e50c80d

Browse files
authored
Merge pull request ceph#55223 from athanatos/sjust/wip-64055
crimson: clear obc_registry on interval change Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Samuel Just <[email protected]>
2 parents 18149a7 + b6436f4 commit e50c80d

File tree

7 files changed

+191
-49
lines changed

7 files changed

+191
-49
lines changed

src/common/intrusive_lru.h

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@ namespace ceph::common {
1212
/**
1313
* intrusive_lru: lru implementation with embedded map and list hook
1414
*
15-
* Elements will be stored in an intrusive set. Once an element is no longer
16-
* referenced it will remain in the set. The unreferenced elements will be
17-
* evicted from the set once the set size exceeds the `lru_target_size`.
18-
* Referenced elements will not be evicted as this is a registery with
19-
* extra caching capabilities.
15+
* Elements with live references are guarranteed to remain accessible.
16+
* Elements without live references may remain accessible -- implementation
17+
* will release unreferenced elements based on lru_target_size.
2018
*
21-
* Note, this implementation currently is entirely thread-unsafe.
19+
* Accesses, mutations, and references must be confined to a single thread or
20+
* serialized via some other mechanism.
2221
*/
2322

2423
template <typename K, typename V, typename VToK>
@@ -43,19 +42,47 @@ void intrusive_ptr_release(intrusive_lru_base<Config> *p);
4342

4443
template <typename Config>
4544
class intrusive_lru_base {
45+
/* object invariants
46+
*
47+
* intrusive_lru objects may be in one of three states:
48+
* 1. referenced
49+
* - accessible via intrusive_lru
50+
* - intrusive_lru_base::lru is points to parent intrusive_lru
51+
* - present in intrusive_lru::lru_set
52+
* - absent from intrusive_lru::unreferenced_list
53+
* - use_count > 0
54+
* - not eligible for eviction
55+
* - intrusive_lru_release may be invoked externally
56+
* 2. unreferenced
57+
* - accessible via intrusive_lru
58+
* - intrusive_lru_base::lru is null
59+
* - present in intrusive_lru::lru_set
60+
* - present in intrusive_lru::unreferenced_list
61+
* - use_count == 0
62+
* - eligible for eviction
63+
* - intrusive_lru_release cannot be invoked
64+
* 3. invalidated
65+
* - inaccessible via intrusive_lru
66+
* - intrusive_lru_base::lru is null
67+
* - absent from intrusive_lru::lru_set
68+
* - absent from intrusive_lru::unreferenced_list
69+
* - use_count > 0
70+
* - intrusive_lru_release may be invoked externally
71+
*/
4672
unsigned use_count = 0;
4773

48-
// lru points to the corresponding intrusive_lru
49-
// which will be set to null if its use_count
50-
// is zero (aka unreferenced).
74+
// See above, points at intrusive_lru iff referenced
5175
intrusive_lru<Config> *lru = nullptr;
5276

5377
public:
5478
bool is_referenced() const {
5579
return static_cast<bool>(lru);
5680
}
5781
bool is_unreferenced() const {
58-
return !is_referenced();
82+
return !is_referenced() && use_count == 0;
83+
}
84+
bool is_invalidated() const {
85+
return !is_referenced() && use_count > 0;
5986
}
6087
boost::intrusive::set_member_hook<> set_hook;
6188
boost::intrusive::list_member_hook<> list_hook;
@@ -108,9 +135,9 @@ class intrusive_lru {
108135

109136
// when the lru_set exceeds its target size, evict
110137
// only unreferenced elements from it (if any).
111-
void evict() {
138+
void evict(unsigned target_size) {
112139
while (!unreferenced_list.empty() &&
113-
lru_set.size() > lru_target_size) {
140+
lru_set.size() > target_size) {
114141
auto &evict_target = unreferenced_list.front();
115142
assert(evict_target.is_unreferenced());
116143
unreferenced_list.pop_front();
@@ -136,7 +163,7 @@ class intrusive_lru {
136163
assert(b.is_unreferenced());
137164
lru_set.insert(b);
138165
b.lru = this;
139-
evict();
166+
evict(lru_target_size);
140167
}
141168

142169
// an element in the lru_set has no users,
@@ -145,7 +172,7 @@ class intrusive_lru {
145172
assert(b.is_referenced());
146173
unreferenced_list.push_back(b);
147174
b.lru = nullptr;
148-
evict();
175+
evict(lru_target_size);
149176
}
150177

151178
public:
@@ -189,6 +216,21 @@ class intrusive_lru {
189216
}
190217
}
191218

219+
/// drop all elements from lru, invoke f on any with outstanding references
220+
template <typename F>
221+
void clear(F &&f) {
222+
evict(0);
223+
assert(unreferenced_list.empty());
224+
for (auto &i: lru_set) {
225+
std::invoke(f, static_cast<T&>(i));
226+
i.lru = nullptr;
227+
assert(i.is_invalidated());
228+
}
229+
lru_set.clear_and_dispose([](auto *i){
230+
assert(i->use_count > 0); /* don't delete, still has a ref count */
231+
});
232+
}
233+
192234
template <class F>
193235
void for_each(F&& f) {
194236
for (auto& v : lru_set) {
@@ -212,7 +254,7 @@ class intrusive_lru {
212254

213255
void set_target_size(size_t target_size) {
214256
lru_target_size = target_size;
215-
evict();
257+
evict(lru_target_size);
216258
}
217259

218260
~intrusive_lru() {
@@ -226,17 +268,24 @@ class intrusive_lru {
226268
template <typename Config>
227269
void intrusive_ptr_add_ref(intrusive_lru_base<Config> *p) {
228270
assert(p);
229-
assert(p->lru);
230271
p->use_count++;
272+
assert(p->is_referenced() || p->is_invalidated());
231273
}
232274

233275
template <typename Config>
234276
void intrusive_ptr_release(intrusive_lru_base<Config> *p) {
277+
/* See object invariants above -- intrusive_ptr_release can only be invoked on
278+
* is_referenced() or is_invalidated() objects with live external references */
235279
assert(p);
236280
assert(p->use_count > 0);
281+
assert(p->is_referenced() || p->is_invalidated());
237282
--p->use_count;
238283
if (p->use_count == 0) {
239-
p->lru->mark_as_unreferenced(*p);
284+
if (p->lru) {
285+
p->lru->mark_as_unreferenced(*p);
286+
} else {
287+
delete p;
288+
}
240289
}
241290
}
242291

src/crimson/osd/object_context.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,13 @@ class ObjectContext : public ceph::common::intrusive_lru_base<
9494
ceph_assert(is_head());
9595
obs = std::move(_obs);
9696
ssc = std::move(_ssc);
97+
fully_loaded = true;
9798
}
9899

99100
void set_clone_state(ObjectState &&_obs) {
100101
ceph_assert(!is_head());
101102
obs = std::move(_obs);
103+
fully_loaded = true;
102104
}
103105

104106
/// pass the provided exception to any waiting consumers of this ObjectContext
@@ -110,6 +112,10 @@ class ObjectContext : public ceph::common::intrusive_lru_base<
110112
}
111113
}
112114

115+
bool is_loaded_and_valid() const {
116+
return fully_loaded && !invalidated_by_interval_change;
117+
}
118+
113119
private:
114120
tri_mutex lock;
115121
bool recovery_read_marker = false;
@@ -124,9 +130,13 @@ class ObjectContext : public ceph::common::intrusive_lru_base<
124130
});
125131
}
126132

127-
boost::intrusive::list_member_hook<> list_hook;
133+
boost::intrusive::list_member_hook<> obc_accessing_hook;
128134
uint64_t list_link_cnt = 0;
135+
bool fully_loaded = false;
136+
bool invalidated_by_interval_change = false;
129137

138+
friend class ObjectContextRegistry;
139+
friend class ObjectContextLoader;
130140
public:
131141

132142
template <typename ListType>
@@ -147,7 +157,7 @@ class ObjectContext : public ceph::common::intrusive_lru_base<
147157
using obc_accessing_option_t = boost::intrusive::member_hook<
148158
ObjectContext,
149159
boost::intrusive::list_member_hook<>,
150-
&ObjectContext::list_hook>;
160+
&ObjectContext::obc_accessing_hook>;
151161

152162
template<RWState::State Type, typename InterruptCond = void, typename Func>
153163
auto with_lock(Func&& func) {
@@ -260,6 +270,12 @@ class ObjectContextRegistry : public md_config_obs_t {
260270
obc_lru.clear_range(from, to);
261271
}
262272

273+
void invalidate_on_interval_change() {
274+
obc_lru.clear([](auto &obc) {
275+
obc.invalidated_by_interval_change = true;
276+
});
277+
}
278+
263279
template <class F>
264280
void for_each(F&& f) {
265281
obc_lru.for_each(std::forward<F>(f));

src/crimson/osd/object_context_loader.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,15 @@ using crimson::common::local_conf;
168168
auto loaded =
169169
load_obc_iertr::make_ready_future<ObjectContextRef>(obc);
170170
if (existed) {
171+
if (!obc->is_loaded_and_valid()) {
172+
ERRORDPP(
173+
"obc for {} invalid -- fully_loaded={}, "
174+
"invalidated_by_interval_change={}",
175+
dpp, obc->get_oid(),
176+
obc->fully_loaded, obc->invalidated_by_interval_change
177+
);
178+
}
179+
ceph_assert(obc->is_loaded_and_valid());
171180
DEBUGDPP("cache hit on {}", dpp, obc->get_oid());
172181
} else {
173182
DEBUGDPP("cache miss on {}", dpp, obc->get_oid());

src/crimson/osd/ops_executer.cc

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -931,12 +931,12 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
931931
return std::vector<snapid_t>{std::begin(snapc.snaps), last};
932932
}();
933933

934-
auto [snap_oi, clone_obc] = prepare_clone(coid);
934+
auto clone_obc = prepare_clone(coid);
935935
// make clone
936-
backend.clone(snap_oi, initial_obs, clone_obc->obs, txn);
936+
backend.clone(clone_obc->obs.oi, initial_obs, clone_obc->obs, txn);
937937

938938
delta_stats.num_objects++;
939-
if (snap_oi.is_omap()) {
939+
if (clone_obc->obs.oi.is_omap()) {
940940
delta_stats.num_objects_omap++;
941941
}
942942
delta_stats.num_object_clones++;
@@ -960,7 +960,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
960960
cloning_ctx->log_entry = {
961961
pg_log_entry_t::CLONE,
962962
coid,
963-
snap_oi.version,
963+
clone_obc->obs.oi.version,
964964
initial_obs.oi.version,
965965
initial_obs.oi.user_version,
966966
osd_reqid_t(),
@@ -1028,32 +1028,23 @@ OpsExecuter::flush_clone_metadata(
10281028
});
10291029
}
10301030

1031-
// TODO: make this static
1032-
std::pair<object_info_t, ObjectContextRef> OpsExecuter::prepare_clone(
1031+
ObjectContextRef OpsExecuter::prepare_clone(
10331032
const hobject_t& coid)
10341033
{
1035-
object_info_t static_snap_oi(coid);
1036-
static_snap_oi.version = osd_op_params->at_version;
1037-
static_snap_oi.prior_version = obc->obs.oi.version;
1038-
static_snap_oi.copy_user_bits(obc->obs.oi);
1039-
if (static_snap_oi.is_whiteout()) {
1040-
// clone shouldn't be marked as whiteout
1041-
static_snap_oi.clear_flag(object_info_t::FLAG_WHITEOUT);
1042-
}
1043-
1044-
ObjectContextRef clone_obc;
1045-
if (pg->is_primary()) {
1046-
// lookup_or_create
1047-
auto [c_obc, existed] =
1048-
pg->obc_registry.get_cached_obc(std::move(coid));
1049-
assert(!existed);
1050-
c_obc->obs.oi = static_snap_oi;
1051-
c_obc->obs.exists = true;
1052-
c_obc->ssc = obc->ssc;
1053-
logger().debug("clone_obc: {}", c_obc->obs.oi);
1054-
clone_obc = std::move(c_obc);
1055-
}
1056-
return std::make_pair(std::move(static_snap_oi), std::move(clone_obc));
1034+
ceph_assert(pg->is_primary());
1035+
ObjectState clone_obs{coid};
1036+
clone_obs.exists = true;
1037+
clone_obs.oi.version = osd_op_params->at_version;
1038+
clone_obs.oi.prior_version = obc->obs.oi.version;
1039+
clone_obs.oi.copy_user_bits(obc->obs.oi);
1040+
clone_obs.oi.clear_flag(object_info_t::FLAG_WHITEOUT);
1041+
1042+
auto [clone_obc, existed] = pg->obc_registry.get_cached_obc(std::move(coid));
1043+
ceph_assert(!existed);
1044+
1045+
clone_obc->set_clone_state(std::move(clone_obs));
1046+
clone_obc->ssc = obc->ssc;
1047+
return clone_obc;
10571048
}
10581049

10591050
void OpsExecuter::apply_stats()

src/crimson/osd/ops_executer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
449449

450450
version_t get_last_user_version() const;
451451

452-
std::pair<object_info_t, ObjectContextRef> prepare_clone(
452+
ObjectContextRef prepare_clone(
453453
const hobject_t& coid);
454454

455455
void apply_stats();

src/crimson/osd/pg.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,7 @@ void PG::on_change(ceph::os::Transaction &t) {
15291529
client_request_orderer.clear_and_cancel(*this);
15301530
}
15311531
scrubber.on_interval_change();
1532+
obc_registry.invalidate_on_interval_change();
15321533
}
15331534

15341535
void PG::context_registry_on_change() {

0 commit comments

Comments
 (0)