2525
2626using std::list;
2727using std::pair;
28- using std::set;
2928using std::stringstream;
3029using std::vector;
3130using namespace Scrub ;
@@ -1039,13 +1038,16 @@ int PgScrubber::build_replica_map_chunk()
10391038 case 0 : {
10401039 // finished!
10411040
1042- m_be->replica_clean_meta (replica_scrubmap, m_end.is_max (), m_start);
1041+ auto required_fixes = m_be->replica_clean_meta (
1042+ replica_scrubmap, m_end.is_max (), m_start, *this );
1043+ // actuate snap-mapper changes:
1044+ apply_snap_mapper_fixes (required_fixes);
10431045
10441046 // the local map has been created. Send it to the primary.
10451047 // Note: once the message reaches the Primary, it may ask us for another
1046- // chunk - and we better be done with the current scrub. Thus - the preparation of
1047- // the reply message is separate, and we clear the scrub state before actually
1048- // sending it.
1048+ // chunk - and we better be done with the current scrub. Thus - the
1049+ // preparation of the reply message is separate, and we clear the scrub
1050+ // state before actually sending it.
10491051
10501052 auto reply = prep_replica_map_msg (PreemptionNoted::no_preemption);
10511053 replica_handling_done ();
@@ -1129,10 +1131,107 @@ void PgScrubber::run_callbacks()
11291131 }
11301132}
11311133
1134+ void PgScrubber::persist_scrub_results (inconsistent_objs_t && all_errors)
1135+ {
1136+ dout (10 ) << __func__ << " " << all_errors.size () << " errors" << dendl;
1137+
1138+ for (auto & e : all_errors) {
1139+ std::visit ([this ](auto & e) { m_store->add_error (m_pg->pool .id , e); }, e);
1140+ }
1141+
1142+ ObjectStore::Transaction t;
1143+ m_store->flush (&t);
1144+ m_osds->store ->queue_transaction (m_pg->ch , std::move (t), nullptr );
1145+ }
1146+
1147+ void PgScrubber::apply_snap_mapper_fixes (
1148+ const std::vector<snap_mapper_fix_t >& fix_list)
1149+ {
1150+ dout (15 ) << __func__ << " " << fix_list.size () << " fixes" << dendl;
1151+
1152+ if (fix_list.empty ()) {
1153+ return ;
1154+ }
1155+
1156+ ObjectStore::Transaction t;
1157+ OSDriver::OSTransaction t_drv (m_pg->osdriver .get_transaction (&t));
1158+
1159+ for (auto & [fix_op, hoid, snaps, bogus_snaps] : fix_list) {
1160+
1161+ if (fix_op == snap_mapper_op_t ::update) {
1162+
1163+ // must remove the existing snap-set before inserting the correct one
1164+ if (auto r = m_pg->snap_mapper .remove_oid (hoid, &t_drv); r < 0 ) {
1165+
1166+ derr << __func__ << " : remove_oid returned " << cpp_strerror (r)
1167+ << dendl;
1168+ ceph_abort ();
1169+ }
1170+
1171+ m_osds->clog ->error () << fmt::format (
1172+ " osd.{} found snap mapper error on pg {} oid {} snaps in mapper: {}, "
1173+ " oi: "
1174+ " {} ...repaired" ,
1175+ m_pg_whoami, m_pg_id, hoid, bogus_snaps, snaps);
1176+
1177+ } else {
1178+
1179+ m_osds->clog ->error () << fmt::format (
1180+ " osd.{} found snap mapper error on pg {} oid {} snaps missing in "
1181+ " mapper, should be: {} ...repaired" ,
1182+ m_pg_whoami, m_pg_id, hoid, snaps);
1183+ }
1184+
1185+ // now - insert the correct snap-set
1186+
1187+ m_pg->snap_mapper .add_oid (hoid, snaps, &t_drv);
1188+ }
1189+
1190+ // wait for repair to apply to avoid confusing other bits of the system.
1191+ {
1192+ dout (15 ) << __func__ << " wait on repair!" << dendl;
1193+
1194+ ceph::condition_variable my_cond;
1195+ ceph::mutex my_lock = ceph::make_mutex (" PG::_scan_snaps my_lock" );
1196+ int e = 0 ;
1197+ bool done{false };
1198+
1199+ t.register_on_applied_sync (new C_SafeCond (my_lock, my_cond, &done, &e));
1200+
1201+ if (e = m_pg->osd ->store ->queue_transaction (m_pg->ch , std::move (t));
1202+ e != 0 ) {
1203+ derr << __func__ << " : queue_transaction got " << cpp_strerror (e)
1204+ << dendl;
1205+ } else {
1206+ std::unique_lock l{my_lock};
1207+ my_cond.wait (l, [&done] { return done; });
1208+ ceph_assert (m_pg->osd ->store ); // RRR why?
1209+ }
1210+ dout (15 ) << __func__ << " wait on repair - done" << dendl;
1211+ }
1212+ }
1213+
11321214void PgScrubber::maps_compare_n_cleanup ()
11331215{
11341216 m_pg->add_objects_scrubbed_count (m_be->get_primary_scrubmap ().objects .size ());
1135- m_be->scrub_compare_maps (m_end.is_max ());
1217+
1218+ auto required_fixes = m_be->scrub_compare_maps (m_end.is_max (), *this );
1219+ if (!required_fixes.inconsistent_objs .empty ()) {
1220+ if (state_test (PG_STATE_REPAIR)) {
1221+ dout (10 ) << __func__ << " : discarding scrub results (repairing)" << dendl;
1222+ } else {
1223+ // perform the ordered scrub-store I/O:
1224+ persist_scrub_results (std::move (required_fixes.inconsistent_objs ));
1225+ }
1226+ }
1227+
1228+ // actuate snap-mapper changes:
1229+ apply_snap_mapper_fixes (required_fixes.snap_fix_list );
1230+
1231+ auto chunk_err_counts = m_be->get_error_counts ();
1232+ m_shallow_errors += chunk_err_counts.shallow_errors ;
1233+ m_deep_errors += chunk_err_counts.deep_errors ;
1234+
11361235 m_start = m_end;
11371236 run_callbacks ();
11381237 requeue_waiting ();
@@ -1527,20 +1626,23 @@ void PgScrubber::scrub_finish()
15271626 // if the repair request comes from auto-repair and large number of errors,
15281627 // we would like to cancel auto-repair
15291628 if (m_is_repair && m_flags.auto_repair &&
1530- m_authoritative.size () > m_pg->cct ->_conf ->osd_scrub_auto_repair_num_errors ) {
1629+ m_be->authoritative_peers_count () >
1630+ static_cast <int >(m_pg->cct ->_conf ->osd_scrub_auto_repair_num_errors )) {
15311631
15321632 dout (10 ) << __func__ << " undoing the repair" << dendl;
1533- state_clear (PG_STATE_REPAIR); // not expected to be set, anyway
1633+ state_clear (PG_STATE_REPAIR); // not expected to be set, anyway
15341634 m_is_repair = false ;
15351635 update_op_mode_text ();
15361636 }
15371637
15381638 m_be->update_repair_status (m_is_repair);
15391639
1540- // if a regular scrub had errors within the limit, do a deep scrub to auto repair
1640+ // if a regular scrub had errors within the limit, do a deep scrub to auto
1641+ // repair
15411642 bool do_auto_scrub = false ;
1542- if (m_flags.deep_scrub_on_error && !m_authoritative.empty () &&
1543- m_authoritative.size () <= m_pg->cct ->_conf ->osd_scrub_auto_repair_num_errors ) {
1643+ if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count () &&
1644+ m_be->authoritative_peers_count () <=
1645+ static_cast <int >(m_pg->cct ->_conf ->osd_scrub_auto_repair_num_errors )) {
15441646 ceph_assert (!m_is_deep);
15451647 do_auto_scrub = true ;
15461648 dout (15 ) << __func__ << " Try to auto repair after scrub errors" << dendl;
@@ -1551,9 +1653,34 @@ void PgScrubber::scrub_finish()
15511653 // type-specific finish (can tally more errors)
15521654 _scrub_finish ();
15531655
1656+ // / \todo fix the relevant scrub test so that we would not need the extra log
1657+ // / line here (even if the following 'if' is false)
1658+
1659+ if (m_be->authoritative_peers_count ()) {
1660+
1661+ auto err_msg = fmt::format (" {} {} {} missing, {} inconsistent objects" ,
1662+ m_pg->info .pgid ,
1663+ m_mode_desc,
1664+ m_be->m_missing .size (),
1665+ m_be->m_inconsistent .size ());
1666+
1667+ dout (2 ) << err_msg << dendl;
1668+ m_osds->clog ->error () << fmt::to_string (err_msg);
1669+ }
1670+
15541671 // note that the PG_STATE_REPAIR might have changed above
1555- m_fixed_count += m_be->scrub_process_inconsistent ();
1556- bool has_error = !m_authoritative.empty () && m_is_repair;
1672+ if (m_be->authoritative_peers_count () && m_is_repair) {
1673+
1674+ state_clear (PG_STATE_CLEAN);
1675+ // we know we have a problem, so it's OK to set the user-visible flag
1676+ // even if we only reached here via auto-repair
1677+ state_set (PG_STATE_REPAIR);
1678+ update_op_mode_text ();
1679+ m_be->update_repair_status (true );
1680+ m_fixed_count += m_be->scrub_process_inconsistent ();
1681+ }
1682+
1683+ bool has_error = (m_be->authoritative_peers_count () > 0 ) && m_is_repair;
15571684
15581685 {
15591686 stringstream oss;
@@ -2004,7 +2131,6 @@ void PgScrubber::reset_internal_state()
20042131
20052132 run_callbacks ();
20062133
2007- m_authoritative.clear ();
20082134 num_digest_updates_pending = 0 ;
20092135 m_primary_scrubmap_pos.reset ();
20102136 replica_scrubmap = ScrubMap{};
0 commit comments