Skip to content

Commit 4f1ef85

Browse files
committed
osd/scrub: modify ScrubStore contents retrieval
A separate commit added a simple test to verify the new store implementation (creating both shallow & deep errors), scrubbing (step 1), deep scrubbing (step 2), then shallow scrubbing again (step 3). The test verifies that the results after step 2 include all shallow errors data (*), and that the results after step 3 include all deep errors data. The test highlighted the need to correctly partition and retrieve the "shards inconsistencies" and the "selected shard" data, which was not fully implemented in the previous commit. Thus, this commit adds the following: - add_object_error() no longer filters out data saved during deep scrubbing; it also filters less of the shallow scrubs "shards inconsistencies" data; - merge_encoded_error_wrappers() now merges the "shards inconsistencies" data correctly, handling the multiple scenarios possible. (*) note the special case of not being able to read the object's version during deep scrubbing (due to a read error). In this case - the data collected during the shallow scrub will not be reported. Signed-off-by: Ronen Friedman <[email protected]>
1 parent 47ef574 commit 4f1ef85

File tree

2 files changed

+106
-50
lines changed

2 files changed

+106
-50
lines changed

src/osd/scrubber/ScrubStore.cc

Lines changed: 104 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -142,49 +142,30 @@ void Store::add_error(int64_t pool, const inconsistent_obj_wrapper& e)
142142
add_object_error(pool, e);
143143
}
144144

145-
namespace {
146-
147-
inconsistent_obj_wrapper create_filtered_copy(
148-
const inconsistent_obj_wrapper& obj,
149-
uint64_t obj_err_mask,
150-
uint64_t shard_err_mask)
151-
{
152-
inconsistent_obj_wrapper dup = obj;
153-
dup.errors &= obj_err_mask;
154-
for (auto& [shard, si] : dup.shards) {
155-
si.errors &= shard_err_mask;
156-
}
157-
return dup;
158-
}
159-
160-
} // namespace
161-
162145

163146
void Store::add_object_error(int64_t pool, const inconsistent_obj_wrapper& e)
164147
{
148+
using librados::obj_err_t;
165149
const auto key = to_object_key(pool, e.object);
166150
dout(20) << fmt::format(
167-
"adding error for object {} ({}). Errors: {} ({}/{}) wr:{}",
168-
e.object, key, librados::err_t{e.errors},
169-
librados::err_t{e.errors & librados::err_t::SHALLOW_ERRORS},
170-
librados::err_t{e.errors & librados::err_t::DEEP_ERRORS}, e)
151+
"{}: adding error for object {} ({}). Errors: {} ({}/{}) "
152+
"unfiltered:{}",
153+
(current_level == scrub_level_t::deep ? "deep" : "shallow"),
154+
e.object, key, obj_err_t{e.errors},
155+
obj_err_t{e.errors & obj_err_t::SHALLOW_ERRORS},
156+
obj_err_t{e.errors & obj_err_t::DEEP_ERRORS}, e)
171157
<< dendl;
172158

173-
// divide the errors & shard errors into shallow and deep.
174-
{
175-
bufferlist bl;
176-
create_filtered_copy(
177-
e, librados::obj_err_t::SHALLOW_ERRORS, librados::err_t::SHALLOW_ERRORS)
178-
.encode(bl);
179-
shallow_db->results[key] = bl;
180-
}
181-
{
182-
bufferlist bl;
183-
create_filtered_copy(
184-
e, librados::obj_err_t::DEEP_ERRORS, librados::err_t::DEEP_ERRORS)
185-
.encode(bl);
186-
deep_db->results[key] = bl;
159+
if (current_level == scrub_level_t::deep) {
160+
// not overriding the deep errors DB during shallow scrubs
161+
deep_db->results[key] = e.encode();
187162
}
163+
164+
// only shallow errors are stored in the shallow DB
165+
auto e_copy = e;
166+
e_copy.errors &= librados::obj_err_t::SHALLOW_ERRORS;
167+
e_copy.union_shards.errors &= librados::err_t::SHALLOW_ERRORS;
168+
shallow_db->results[key] = e_copy.encode();
188169
}
189170

190171

@@ -251,6 +232,8 @@ void Store::reinit(
251232
(level == scrub_level_t::deep ? "deep" : "shallow"))
252233
<< dendl;
253234

235+
current_level = level;
236+
254237
// always clear the known shallow errors DB (as both shallow and deep scrubs
255238
// would recreate it)
256239
if (shallow_db) {
@@ -344,6 +327,15 @@ void Store::collect_specific_store(
344327
}
345328

346329

330+
/*
331+
* Implementation notes:
332+
* - see https://github.com/ceph/ceph/commit/df3ff6dafeadb3822b35c424a890db9a14d7f60f
333+
* for why we encode the shard_info_t in the store.
334+
* - to maintain known shard_info-s created during a deep scrub (but only when
335+
* needed), we use our knowledge of the level of the last scrub performed
336+
* (current_level), and the object user version as encoded in the error
337+
* structure.
338+
*/
347339
bufferlist Store::merge_encoded_error_wrappers(
348340
hobject_t obj,
349341
ExpCacherPosData& latest_sh,
@@ -352,26 +344,88 @@ bufferlist Store::merge_encoded_error_wrappers(
352344
// decode both error wrappers
353345
auto sh_wrap = decode_wrapper(obj, latest_sh->data.cbegin());
354346
auto dp_wrap = decode_wrapper(obj, latest_dp->data.cbegin());
355-
dout(20) << fmt::format(
356-
"merging errors {}. Shallow: {}-({}), Deep: {}-({})",
357-
sh_wrap.object, sh_wrap.errors, dp_wrap.errors, sh_wrap,
358-
dp_wrap)
359-
<< dendl;
360347

361-
// merge the object errors (a simple OR of the two error bit-sets)
362-
sh_wrap.errors |= dp_wrap.errors;
363-
364-
// merge the two shard error maps
365-
for (const auto& [shard, si] : dp_wrap.shards) {
348+
// note: the '20' level is just until we're sure the merging works as
349+
// expected
350+
if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
351+
dout(20) << fmt::format(
352+
"merging errors {}. Deep: {:#x}-({})", sh_wrap.object,
353+
dp_wrap.errors, dp_wrap)
354+
<< dendl;
366355
dout(20) << fmt::format(
367-
"shard {} dp-errors: {} sh-errors:{}", shard, si.errors,
368-
sh_wrap.shards[shard].errors)
356+
"merging errors {}. Shallow: {:#x}-({})", sh_wrap.object,
357+
sh_wrap.errors, sh_wrap)
369358
<< dendl;
370-
// note: we may be creating the shallow shard entry here. This is OK
371-
sh_wrap.shards[shard].errors |= si.errors;
359+
// dev: list the attributes:
360+
for (const auto& [shard, si] : sh_wrap.shards) {
361+
for (const auto& [attr, bl] : si.attrs) {
362+
dout(20) << fmt::format(" shallow: shard {} attr: {}", shard, attr)
363+
<< dendl;
364+
}
365+
}
366+
for (const auto& [shard, si] : dp_wrap.shards) {
367+
for (const auto& [attr, bl] : si.attrs) {
368+
dout(20) << fmt::format(" deep: shard {} attr: {}", shard, attr)
369+
<< dendl;
370+
}
371+
}
372+
}
373+
374+
// Actual merging of the shard map entries is only performed if the
375+
// latest version is from the shallow scrub.
376+
// Otherwise, the deep scrub, which (for the shards info) contains all data,
377+
// and the shallow scrub is ignored.
378+
if (current_level == scrub_level_t::shallow) {
379+
// is the object data related to the same object version?
380+
if (sh_wrap.version == dp_wrap.version) {
381+
// combine the error information
382+
dp_wrap.errors |= sh_wrap.errors;
383+
for (const auto& [shard, si] : sh_wrap.shards) {
384+
if (dp_wrap.shards.contains(shard)) {
385+
dout(20) << fmt::format(
386+
"-----> {}-{} combining: sh-errors: {} dp-errors:{}",
387+
sh_wrap.object, shard, si, dp_wrap.shards[shard])
388+
<< dendl;
389+
const auto saved_er = dp_wrap.shards[shard].errors;
390+
dp_wrap.shards[shard].selected_oi = si.selected_oi;
391+
dp_wrap.shards[shard].primary = si.primary;
392+
dp_wrap.shards[shard].errors |= saved_er;
393+
394+
// the attributes:
395+
for (const auto& [attr, bl] : si.attrs) {
396+
if (!dp_wrap.shards[shard].attrs.contains(attr)) {
397+
dout(20) << fmt::format(
398+
"-----> {}-{} copying shallow attr: attr: {}",
399+
sh_wrap.object, shard, attr)
400+
<< dendl;
401+
dp_wrap.shards[shard].attrs[attr] = bl;
402+
}
403+
// otherwise - we'll ignore the shallow attr buffer
404+
}
405+
} else {
406+
// the deep scrub data for this shard is missing. We take the shallow
407+
// scrub data.
408+
dp_wrap.shards[shard] = si;
409+
}
410+
}
411+
} else if (sh_wrap.version > dp_wrap.version) {
412+
if (false && dp_wrap.version == 0) {
413+
// there was a read error in the deep scrub. The deep version
414+
// shows as '0'. That's severe enough for us to ignore the shallow.
415+
dout(10) << fmt::format("{} ignoring deep after read failure",
416+
sh_wrap.object)
417+
<< dendl;
418+
} else {
419+
// There is a new shallow version of the object results.
420+
// The deep data is for an older version of that object.
421+
// There are multiple possibilities here, but for now we ignore the
422+
// deep data.
423+
dp_wrap = sh_wrap;
424+
}
425+
}
372426
}
373427

374-
return sh_wrap.encode();
428+
return dp_wrap.encode();
375429
}
376430

377431

src/osd/scrubber/ScrubStore.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ class Store {
130130
/// the collection (i.e. - the PG store) in which the errors are stored
131131
const coll_t coll;
132132

133+
scrub_level_t current_level;
134+
133135
/**
134136
* the machinery (backend details, cache, etc.) for storing both levels
135137
* of errors (note: 'optional' to allow delayed creation w/o dynamic

0 commit comments

Comments
 (0)