@@ -142,49 +142,30 @@ void Store::add_error(int64_t pool, const inconsistent_obj_wrapper& e)
142142 add_object_error (pool, e);
143143}
144144
145- namespace {
146-
147- inconsistent_obj_wrapper create_filtered_copy (
148- const inconsistent_obj_wrapper& obj,
149- uint64_t obj_err_mask,
150- uint64_t shard_err_mask)
151- {
152- inconsistent_obj_wrapper dup = obj;
153- dup.errors &= obj_err_mask;
154- for (auto & [shard, si] : dup.shards ) {
155- si.errors &= shard_err_mask;
156- }
157- return dup;
158- }
159-
160- } // namespace
161-
162145
163146void Store::add_object_error (int64_t pool, const inconsistent_obj_wrapper& e)
164147{
148+ using librados::obj_err_t ;
165149 const auto key = to_object_key (pool, e.object );
166150 dout (20 ) << fmt::format (
167- " adding error for object {} ({}). Errors: {} ({}/{}) wr:{}" ,
168- e.object , key, librados::err_t {e.errors },
169- librados::err_t {e.errors & librados::err_t ::SHALLOW_ERRORS},
170- librados::err_t {e.errors & librados::err_t ::DEEP_ERRORS}, e)
151+ " {}: adding error for object {} ({}). Errors: {} ({}/{}) "
152+ " unfiltered:{}" ,
153+ (current_level == scrub_level_t ::deep ? " deep" : " shallow" ),
154+ e.object , key, obj_err_t {e.errors },
155+ obj_err_t {e.errors & obj_err_t ::SHALLOW_ERRORS},
156+ obj_err_t {e.errors & obj_err_t ::DEEP_ERRORS}, e)
171157 << dendl;
172158
173- // divide the errors & shard errors into shallow and deep.
174- {
175- bufferlist bl;
176- create_filtered_copy (
177- e, librados::obj_err_t ::SHALLOW_ERRORS, librados::err_t ::SHALLOW_ERRORS)
178- .encode (bl);
179- shallow_db->results [key] = bl;
180- }
181- {
182- bufferlist bl;
183- create_filtered_copy (
184- e, librados::obj_err_t ::DEEP_ERRORS, librados::err_t ::DEEP_ERRORS)
185- .encode (bl);
186- deep_db->results [key] = bl;
159+ if (current_level == scrub_level_t ::deep) {
160+ // not overriding the deep errors DB during shallow scrubs
161+ deep_db->results [key] = e.encode ();
187162 }
163+
164+ // only shallow errors are stored in the shallow DB
165+ auto e_copy = e;
166+ e_copy.errors &= librados::obj_err_t ::SHALLOW_ERRORS;
167+ e_copy.union_shards .errors &= librados::err_t ::SHALLOW_ERRORS;
168+ shallow_db->results [key] = e_copy.encode ();
188169}
189170
190171
@@ -251,6 +232,8 @@ void Store::reinit(
251232 (level == scrub_level_t ::deep ? " deep" : " shallow" ))
252233 << dendl;
253234
235+ current_level = level;
236+
254237 // always clear the known shallow errors DB (as both shallow and deep scrubs
255238 // would recreate it)
256239 if (shallow_db) {
@@ -344,6 +327,15 @@ void Store::collect_specific_store(
344327}
345328
346329
330+ /*
331+ * Implementation notes:
332+ * - see https://github.com/ceph/ceph/commit/df3ff6dafeadb3822b35c424a890db9a14d7f60f
333+ * for why we encode the shard_info_t in the store.
334+ * - to maintain known shard_info-s created during a deep scrub (but only when
335+ * needed), we use our knowledge of the level of the last scrub performed
336+ * (current_level), and the object user version as encoded in the error
337+ * structure.
338+ */
347339bufferlist Store::merge_encoded_error_wrappers (
348340 hobject_t obj,
349341 ExpCacherPosData& latest_sh,
@@ -352,26 +344,88 @@ bufferlist Store::merge_encoded_error_wrappers(
352344 // decode both error wrappers
353345 auto sh_wrap = decode_wrapper (obj, latest_sh->data .cbegin ());
354346 auto dp_wrap = decode_wrapper (obj, latest_dp->data .cbegin ());
355- dout (20 ) << fmt::format (
356- " merging errors {}. Shallow: {}-({}), Deep: {}-({})" ,
357- sh_wrap.object , sh_wrap.errors , dp_wrap.errors , sh_wrap,
358- dp_wrap)
359- << dendl;
360347
361- // merge the object errors (a simple OR of the two error bit-sets)
362- sh_wrap.errors |= dp_wrap.errors ;
363-
364- // merge the two shard error maps
365- for (const auto & [shard, si] : dp_wrap.shards ) {
348+ // note: the '20' level is just until we're sure the merging works as
349+ // expected
350+ if (g_conf ()->subsys .should_gather <ceph_subsys_osd, 20 >()) {
351+ dout (20 ) << fmt::format (
352+ " merging errors {}. Deep: {:#x}-({})" , sh_wrap.object ,
353+ dp_wrap.errors , dp_wrap)
354+ << dendl;
366355 dout (20 ) << fmt::format (
367- " shard {} dp-errors : {} sh-errors:{} " , shard, si. errors ,
368- sh_wrap.shards [shard]. errors )
356+ " merging errors {}. Shallow : {:#x}-({}) " , sh_wrap. object ,
357+ sh_wrap.errors , sh_wrap )
369358 << dendl;
370- // note: we may be creating the shallow shard entry here. This is OK
371- sh_wrap.shards [shard].errors |= si.errors ;
359+ // dev: list the attributes:
360+ for (const auto & [shard, si] : sh_wrap.shards ) {
361+ for (const auto & [attr, bl] : si.attrs ) {
362+ dout (20 ) << fmt::format (" shallow: shard {} attr: {}" , shard, attr)
363+ << dendl;
364+ }
365+ }
366+ for (const auto & [shard, si] : dp_wrap.shards ) {
367+ for (const auto & [attr, bl] : si.attrs ) {
368+ dout (20 ) << fmt::format (" deep: shard {} attr: {}" , shard, attr)
369+ << dendl;
370+ }
371+ }
372+ }
373+
374+ // Actual merging of the shard map entries is only performed if the
375+ // latest version is from the shallow scrub.
376+ // Otherwise, the deep scrub, which (for the shards info) contains all data,
377+ // and the shallow scrub is ignored.
378+ if (current_level == scrub_level_t ::shallow) {
379+ // is the object data related to the same object version?
380+ if (sh_wrap.version == dp_wrap.version ) {
381+ // combine the error information
382+ dp_wrap.errors |= sh_wrap.errors ;
383+ for (const auto & [shard, si] : sh_wrap.shards ) {
384+ if (dp_wrap.shards .contains (shard)) {
385+ dout (20 ) << fmt::format (
386+ " -----> {}-{} combining: sh-errors: {} dp-errors:{}" ,
387+ sh_wrap.object , shard, si, dp_wrap.shards [shard])
388+ << dendl;
389+ const auto saved_er = dp_wrap.shards [shard].errors ;
390+ dp_wrap.shards [shard].selected_oi = si.selected_oi ;
391+ dp_wrap.shards [shard].primary = si.primary ;
392+ dp_wrap.shards [shard].errors |= saved_er;
393+
394+ // the attributes:
395+ for (const auto & [attr, bl] : si.attrs ) {
396+ if (!dp_wrap.shards [shard].attrs .contains (attr)) {
397+ dout (20 ) << fmt::format (
398+ " -----> {}-{} copying shallow attr: attr: {}" ,
399+ sh_wrap.object , shard, attr)
400+ << dendl;
401+ dp_wrap.shards [shard].attrs [attr] = bl;
402+ }
403+ // otherwise - we'll ignore the shallow attr buffer
404+ }
405+ } else {
406+ // the deep scrub data for this shard is missing. We take the shallow
407+ // scrub data.
408+ dp_wrap.shards [shard] = si;
409+ }
410+ }
411+ } else if (sh_wrap.version > dp_wrap.version ) {
412+ if (false && dp_wrap.version == 0 ) {
413+ // there was a read error in the deep scrub. The deep version
414+ // shows as '0'. That's severe enough for us to ignore the shallow.
415+ dout (10 ) << fmt::format (" {} ignoring deep after read failure" ,
416+ sh_wrap.object )
417+ << dendl;
418+ } else {
419+ // There is a new shallow version of the object results.
420+ // The deep data is for an older version of that object.
421+ // There are multiple possibilities here, but for now we ignore the
422+ // deep data.
423+ dp_wrap = sh_wrap;
424+ }
425+ }
372426 }
373427
374- return sh_wrap .encode ();
428+ return dp_wrap .encode ();
375429}
376430
377431
0 commit comments