Skip to content

Commit 0242368

Browse files
authored
Merge pull request ceph#44941 from ronen-fr/wip-rf-scrubBEv1_2
osd/scrub: limiting scrubber-backend external interfaces Reviewed-by: Samuel Just <[email protected]>
2 parents b2e5170 + fe3ec8b commit 0242368

File tree

7 files changed

+433
-268
lines changed

7 files changed

+433
-268
lines changed

src/osd/PG.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -306,12 +306,12 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
306306
stats.objects_scrubbed = 0;
307307
}
308308

309-
void reset_objects_scrubbed() {
310-
recovery_state.update_stats(
311-
[=](auto &history, auto &stats) {
312-
reset_objects_scrubbed(stats);
313-
return true;
314-
});
309+
void reset_objects_scrubbed()
310+
{
311+
recovery_state.update_stats([=](auto& history, auto& stats) {
312+
reset_objects_scrubbed(stats);
313+
return true;
314+
});
315315
}
316316

317317
bool is_deleting() const {

src/osd/scrubber/ScrubStore.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,23 @@ Store::~Store()
124124
ceph_assert(results.empty());
125125
}
126126

127+
void Store::add_error(int64_t pool, const inconsistent_obj_wrapper& e)
128+
{
129+
add_object_error(pool, e);
130+
}
131+
127132
void Store::add_object_error(int64_t pool, const inconsistent_obj_wrapper& e)
128133
{
129134
bufferlist bl;
130135
e.encode(bl);
131136
results[to_object_key(pool, e.object)] = bl;
132137
}
133138

139+
void Store::add_error(int64_t pool, const inconsistent_snapset_wrapper& e)
140+
{
141+
add_snap_error(pool, e);
142+
}
143+
134144
void Store::add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e)
135145
{
136146
bufferlist bl;

src/osd/scrubber/ScrubStore.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ class Store {
2525
const coll_t& coll);
2626
void add_object_error(int64_t pool, const inconsistent_obj_wrapper& e);
2727
void add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e);
28+
29+
// and a variant-friendly interface:
30+
void add_error(int64_t pool, const inconsistent_obj_wrapper& e);
31+
void add_error(int64_t pool, const inconsistent_snapset_wrapper& e);
32+
2833
bool empty() const;
2934
void flush(ObjectStore::Transaction *);
3035
void cleanup(ObjectStore::Transaction *);

src/osd/scrubber/pg_scrubber.cc

Lines changed: 140 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
using std::list;
2727
using std::pair;
28-
using std::set;
2928
using std::stringstream;
3029
using std::vector;
3130
using namespace Scrub;
@@ -1039,13 +1038,16 @@ int PgScrubber::build_replica_map_chunk()
10391038
case 0: {
10401039
// finished!
10411040

1042-
m_be->replica_clean_meta(replica_scrubmap, m_end.is_max(), m_start);
1041+
auto required_fixes = m_be->replica_clean_meta(
1042+
replica_scrubmap, m_end.is_max(), m_start, *this);
1043+
// actuate snap-mapper changes:
1044+
apply_snap_mapper_fixes(required_fixes);
10431045

10441046
// the local map has been created. Send it to the primary.
10451047
// Note: once the message reaches the Primary, it may ask us for another
1046-
// chunk - and we better be done with the current scrub. Thus - the preparation of
1047-
// the reply message is separate, and we clear the scrub state before actually
1048-
// sending it.
1048+
// chunk - and we better be done with the current scrub. Thus - the
1049+
// preparation of the reply message is separate, and we clear the scrub
1050+
// state before actually sending it.
10491051

10501052
auto reply = prep_replica_map_msg(PreemptionNoted::no_preemption);
10511053
replica_handling_done();
@@ -1129,10 +1131,107 @@ void PgScrubber::run_callbacks()
11291131
}
11301132
}
11311133

1134+
void PgScrubber::persist_scrub_results(inconsistent_objs_t&& all_errors)
1135+
{
1136+
dout(10) << __func__ << " " << all_errors.size() << " errors" << dendl;
1137+
1138+
for (auto& e : all_errors) {
1139+
std::visit([this](auto& e) { m_store->add_error(m_pg->pool.id, e); }, e);
1140+
}
1141+
1142+
ObjectStore::Transaction t;
1143+
m_store->flush(&t);
1144+
m_osds->store->queue_transaction(m_pg->ch, std::move(t), nullptr);
1145+
}
1146+
1147+
void PgScrubber::apply_snap_mapper_fixes(
1148+
const std::vector<snap_mapper_fix_t>& fix_list)
1149+
{
1150+
dout(15) << __func__ << " " << fix_list.size() << " fixes" << dendl;
1151+
1152+
if (fix_list.empty()) {
1153+
return;
1154+
}
1155+
1156+
ObjectStore::Transaction t;
1157+
OSDriver::OSTransaction t_drv(m_pg->osdriver.get_transaction(&t));
1158+
1159+
for (auto& [fix_op, hoid, snaps, bogus_snaps] : fix_list) {
1160+
1161+
if (fix_op == snap_mapper_op_t::update) {
1162+
1163+
// must remove the existing snap-set before inserting the correct one
1164+
if (auto r = m_pg->snap_mapper.remove_oid(hoid, &t_drv); r < 0) {
1165+
1166+
derr << __func__ << ": remove_oid returned " << cpp_strerror(r)
1167+
<< dendl;
1168+
ceph_abort();
1169+
}
1170+
1171+
m_osds->clog->error() << fmt::format(
1172+
"osd.{} found snap mapper error on pg {} oid {} snaps in mapper: {}, "
1173+
"oi: "
1174+
"{} ...repaired",
1175+
m_pg_whoami, m_pg_id, hoid, bogus_snaps, snaps);
1176+
1177+
} else {
1178+
1179+
m_osds->clog->error() << fmt::format(
1180+
"osd.{} found snap mapper error on pg {} oid {} snaps missing in "
1181+
"mapper, should be: {} ...repaired",
1182+
m_pg_whoami, m_pg_id, hoid, snaps);
1183+
}
1184+
1185+
// now - insert the correct snap-set
1186+
1187+
m_pg->snap_mapper.add_oid(hoid, snaps, &t_drv);
1188+
}
1189+
1190+
// wait for repair to apply to avoid confusing other bits of the system.
1191+
{
1192+
dout(15) << __func__ << " wait on repair!" << dendl;
1193+
1194+
ceph::condition_variable my_cond;
1195+
ceph::mutex my_lock = ceph::make_mutex("PG::_scan_snaps my_lock");
1196+
int e = 0;
1197+
bool done{false};
1198+
1199+
t.register_on_applied_sync(new C_SafeCond(my_lock, my_cond, &done, &e));
1200+
1201+
if (e = m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t));
1202+
e != 0) {
1203+
derr << __func__ << ": queue_transaction got " << cpp_strerror(e)
1204+
<< dendl;
1205+
} else {
1206+
std::unique_lock l{my_lock};
1207+
my_cond.wait(l, [&done] { return done; });
1208+
ceph_assert(m_pg->osd->store); // RRR why?
1209+
}
1210+
dout(15) << __func__ << " wait on repair - done" << dendl;
1211+
}
1212+
}
1213+
11321214
void PgScrubber::maps_compare_n_cleanup()
11331215
{
11341216
m_pg->add_objects_scrubbed_count(m_be->get_primary_scrubmap().objects.size());
1135-
m_be->scrub_compare_maps(m_end.is_max());
1217+
1218+
auto required_fixes = m_be->scrub_compare_maps(m_end.is_max(), *this);
1219+
if (!required_fixes.inconsistent_objs.empty()) {
1220+
if (state_test(PG_STATE_REPAIR)) {
1221+
dout(10) << __func__ << ": discarding scrub results (repairing)" << dendl;
1222+
} else {
1223+
// perform the ordered scrub-store I/O:
1224+
persist_scrub_results(std::move(required_fixes.inconsistent_objs));
1225+
}
1226+
}
1227+
1228+
// actuate snap-mapper changes:
1229+
apply_snap_mapper_fixes(required_fixes.snap_fix_list);
1230+
1231+
auto chunk_err_counts = m_be->get_error_counts();
1232+
m_shallow_errors += chunk_err_counts.shallow_errors;
1233+
m_deep_errors += chunk_err_counts.deep_errors;
1234+
11361235
m_start = m_end;
11371236
run_callbacks();
11381237
requeue_waiting();
@@ -1527,20 +1626,23 @@ void PgScrubber::scrub_finish()
15271626
// if the repair request comes from auto-repair and large number of errors,
15281627
// we would like to cancel auto-repair
15291628
if (m_is_repair && m_flags.auto_repair &&
1530-
m_authoritative.size() > m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
1629+
m_be->authoritative_peers_count() >
1630+
static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
15311631

15321632
dout(10) << __func__ << " undoing the repair" << dendl;
1533-
state_clear(PG_STATE_REPAIR); // not expected to be set, anyway
1633+
state_clear(PG_STATE_REPAIR); // not expected to be set, anyway
15341634
m_is_repair = false;
15351635
update_op_mode_text();
15361636
}
15371637

15381638
m_be->update_repair_status(m_is_repair);
15391639

1540-
// if a regular scrub had errors within the limit, do a deep scrub to auto repair
1640+
// if a regular scrub had errors within the limit, do a deep scrub to auto
1641+
// repair
15411642
bool do_auto_scrub = false;
1542-
if (m_flags.deep_scrub_on_error && !m_authoritative.empty() &&
1543-
m_authoritative.size() <= m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) {
1643+
if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count() &&
1644+
m_be->authoritative_peers_count() <=
1645+
static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
15441646
ceph_assert(!m_is_deep);
15451647
do_auto_scrub = true;
15461648
dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl;
@@ -1551,9 +1653,34 @@ void PgScrubber::scrub_finish()
15511653
// type-specific finish (can tally more errors)
15521654
_scrub_finish();
15531655

1656+
/// \todo fix the relevant scrub test so that we would not need the extra log
1657+
/// line here (even if the following 'if' is false)
1658+
1659+
if (m_be->authoritative_peers_count()) {
1660+
1661+
auto err_msg = fmt::format("{} {} {} missing, {} inconsistent objects",
1662+
m_pg->info.pgid,
1663+
m_mode_desc,
1664+
m_be->m_missing.size(),
1665+
m_be->m_inconsistent.size());
1666+
1667+
dout(2) << err_msg << dendl;
1668+
m_osds->clog->error() << fmt::to_string(err_msg);
1669+
}
1670+
15541671
// note that the PG_STATE_REPAIR might have changed above
1555-
m_fixed_count += m_be->scrub_process_inconsistent();
1556-
bool has_error = !m_authoritative.empty() && m_is_repair;
1672+
if (m_be->authoritative_peers_count() && m_is_repair) {
1673+
1674+
state_clear(PG_STATE_CLEAN);
1675+
// we know we have a problem, so it's OK to set the user-visible flag
1676+
// even if we only reached here via auto-repair
1677+
state_set(PG_STATE_REPAIR);
1678+
update_op_mode_text();
1679+
m_be->update_repair_status(true);
1680+
m_fixed_count += m_be->scrub_process_inconsistent();
1681+
}
1682+
1683+
bool has_error = (m_be->authoritative_peers_count() > 0) && m_is_repair;
15571684

15581685
{
15591686
stringstream oss;
@@ -2004,7 +2131,6 @@ void PgScrubber::reset_internal_state()
20042131

20052132
run_callbacks();
20062133

2007-
m_authoritative.clear();
20082134
num_digest_updates_pending = 0;
20092135
m_primary_scrubmap_pos.reset();
20102136
replica_scrubmap = ScrubMap{};

src/osd/scrubber/pg_scrubber.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,9 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf);
255255
* am forced to strongly decouple the state-machine implementation details from
256256
* the actual scrubbing code.
257257
*/
258-
class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
259-
258+
class PgScrubber : public ScrubPgIF,
259+
public ScrubMachineListener,
260+
public SnapMapperAccessor {
260261
public:
261262
explicit PgScrubber(PG* pg);
262263

@@ -509,6 +510,12 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
509510
utime_t scrub_begin_stamp;
510511
std::ostream& gen_prefix(std::ostream& out) const final;
511512

513+
// fetching the snap-set for a given object (used by the scrub-backend)
514+
int get_snaps(const hobject_t& hoid, std::set<snapid_t>* snaps_set) const final
515+
{
516+
return m_pg->snap_mapper.get_snaps(hoid, snaps_set);
517+
}
518+
512519
protected:
513520
bool state_test(uint64_t m) const { return m_pg->state_test(m); }
514521
void state_set(uint64_t m) { m_pg->state_set(m); }
@@ -815,9 +822,8 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
815822

816823
Scrub::MapsCollectionStatus m_maps_status;
817824

818-
819-
/// Maps from object with errors to good peers
820-
std::map<hobject_t, std::list<std::pair<ScrubMap::object, pg_shard_t>>> m_authoritative;
825+
void persist_scrub_results(inconsistent_objs_t&& all_errors);
826+
void apply_snap_mapper_fixes(const std::vector<snap_mapper_fix_t>& fix_list);
821827

822828
// ------------ members used if we are a replica
823829

0 commit comments

Comments
 (0)