Skip to content

Commit 06e22e7

Browse files
kamoltatshraddhaag
authored andcommitted
src/mon/PGMap.cc: init pool_availability
Added PoolAvailability Struct Modified PGMap.cc to include a k,v map: `pool_availability`. The key being the `poolid` and value is `PoolAvailability` Init the function: `PGMap::get_unavailable_pg_in_pool_map()` to identify and aggregate all the PGs we mark as `unavailable` as well as the pool that associates with the unavailable PG. Also, included `pool_availability` to `PGMapDigest::dump()`. Fixes: https://tracker.ceph.com/issues/67777 Signed-off-by: Kamoltat <[email protected]>
1 parent dc0ebca commit 06e22e7

File tree

7 files changed

+259
-2
lines changed

7 files changed

+259
-2
lines changed

src/mon/MgrStatMonitor.cc

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,79 @@ void MgrStatMonitor::create_initial()
6666
encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL);
6767
}
6868

69+
void MgrStatMonitor::calc_pool_availability()
70+
{
71+
dout(20) << __func__ << dendl;
72+
auto pool_avail_end = pool_availability.end();
73+
for (const auto& i : digest.pool_pg_unavailable_map) {
74+
const auto& poolid = i.first;
75+
if (pool_availability.find(poolid) == pool_avail_end){
76+
// New Pool so we add.
77+
pool_availability.insert({poolid, PoolAvailability()});
78+
dout(20) << __func__ << "Adding pool: " << poolid << dendl;
79+
}
80+
}
81+
utime_t now(ceph_clock_now());
82+
auto pool_unavail_end = digest.pool_pg_unavailable_map.end();
83+
for (const auto& i : pool_availability) {
84+
const auto& poolid = i.first;
85+
if (digest.pool_pg_unavailable_map.find(poolid) ==
86+
pool_unavail_end) {
87+
// delete none exist pool
88+
pool_availability.erase(poolid);
89+
dout(20) << __func__ << "Deleting pool: " << poolid << dendl;
90+
continue;
91+
}
92+
if (mon.osdmon()->osdmap.have_pg_pool(poolid)){
93+
// Currently, couldn't find an elegant way to get pool name
94+
pool_availability[poolid].pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
95+
} else {
96+
pool_availability.erase(poolid);
97+
dout(20) << __func__ << "pool: "
98+
<< poolid << " no longer exists in osdmap! Deleting pool: "
99+
<< poolid << dendl;
100+
continue;
101+
}
102+
if (pool_availability[poolid].is_avail) {
103+
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
104+
// avail to unavail
105+
dout(20) << __func__
106+
<< ": Pool " << poolid << " status: Available to Unavailable" << dendl;
107+
pool_availability[poolid].is_avail = false;
108+
pool_availability[poolid].num_failures += 1;
109+
pool_availability[poolid].last_downtime = now;
110+
pool_availability[poolid].uptime +=
111+
now - pool_availability[poolid].last_uptime;
112+
} else {
113+
// avail to avail
114+
dout(20) << __func__
115+
<< ": Pool " << poolid << " status: Available to Available" << dendl;
116+
pool_availability[poolid].uptime +=
117+
now - pool_availability[poolid].last_uptime;
118+
pool_availability[poolid].last_uptime = now;
119+
}
120+
} else {
121+
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
122+
// unavail to unavail
123+
dout(20) << __func__
124+
<< ": Pool " << poolid << " status: Unavailable to Unavailable" << dendl;
125+
pool_availability[poolid].downtime +=
126+
now - pool_availability[poolid].last_downtime;
127+
pool_availability[poolid].last_downtime = now;
128+
} else {
129+
// unavail to avail
130+
dout(20) << __func__
131+
<< ": Pool " << poolid << " status: Unavailable to Available" << dendl;
132+
pool_availability[poolid].is_avail = true;
133+
pool_availability[poolid].last_uptime = now;
134+
pool_availability[poolid].uptime +=
135+
now - pool_availability[poolid].last_downtime;
136+
}
137+
}
138+
}
139+
pending_pool_availability.swap(pool_availability);
140+
}
141+
69142
void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
70143
{
71144
version = get_last_committed();
@@ -82,9 +155,13 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
82155
if (!p.end()) {
83156
decode(progress_events, p);
84157
}
158+
if (!p.end()) {
159+
decode(pool_availability, p);
160+
}
85161
dout(10) << __func__ << " v" << version
86162
<< " service_map e" << service_map.epoch
87163
<< " " << progress_events.size() << " progress events"
164+
<< " " << pool_availability.size() << " pools availability tracked"
88165
<< dendl;
89166
}
90167
catch (ceph::buffer::error& e) {
@@ -95,6 +172,7 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
95172
check_subs();
96173
update_logger();
97174
mon.osdmon()->notify_new_pg_digest();
175+
calc_pool_availability();
98176
}
99177

100178
void MgrStatMonitor::update_logger()
@@ -156,6 +234,7 @@ void MgrStatMonitor::encode_pending(MonitorDBStore::TransactionRef t)
156234
ceph_assert(pending_service_map_bl.length());
157235
bl.append(pending_service_map_bl);
158236
encode(pending_progress_events, bl);
237+
encode(pending_pool_availability, bl);
159238
put_version(t, version, bl);
160239
put_last_committed(t, version);
161240

@@ -260,6 +339,15 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op)
260339
jf.close_section();
261340
jf.flush(*_dout);
262341
*_dout << dendl;
342+
dout(20) << "pool_availability:\n";
343+
JSONFormatter jf(true);
344+
jf.open_object_section("pool_availability");
345+
for (auto& i : pending_pool_availability) {
346+
jf.dump_object(std::to_string(i.first), i.second);
347+
}
348+
jf.close_section();
349+
jf.flush(*_dout);
350+
*_dout << dendl;
263351
return true;
264352
}
265353

src/mon/MgrStatMonitor.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ class MgrStatMonitor : public PaxosService {
1414
PGMapDigest digest;
1515
ServiceMap service_map;
1616
std::map<std::string,ProgressEvent> progress_events;
17+
std::map<uint64_t, PoolAvailability> pool_availability;
1718

1819
// pending commit
1920
PGMapDigest pending_digest;
2021
health_check_map_t pending_health_checks;
2122
std::map<std::string,ProgressEvent> pending_progress_events;
2223
ceph::buffer::list pending_service_map_bl;
24+
std::map<uint64_t, PoolAvailability> pending_pool_availability;
2325

2426
public:
2527
MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name);
@@ -49,6 +51,8 @@ class MgrStatMonitor : public PaxosService {
4951
bool preprocess_getpoolstats(MonOpRequestRef op);
5052
bool preprocess_statfs(MonOpRequestRef op);
5153

54+
void calc_pool_availability();
55+
5256
void check_sub(Subscription *sub);
5357
void check_subs();
5458
void send_digests();
@@ -83,6 +87,10 @@ class MgrStatMonitor : public PaxosService {
8387
return digest;
8488
}
8589

90+
const std::map<uint64_t, PoolAvailability>& get_pool_availability() {
91+
return pool_availability;
92+
}
93+
8694
ceph_statfs get_statfs(OSDMap& osdmap,
8795
std::optional<int64_t> data_pool) const {
8896
return digest.get_statfs(osdmap, data_pool);

src/mon/OSDMonitor.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ using ceph::ErasureCodeProfile;
111111
using ceph::Formatter;
112112
using ceph::JSONFormatter;
113113
using ceph::make_message;
114+
using ceph::make_timespan;
115+
using ceph::timespan_str;
114116
using namespace std::literals;
115117

116118
#define dout_subsys ceph_subsys_mon
@@ -14407,6 +14409,33 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
1440714409
wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs,
1440814410
get_last_committed() + 1));
1440914411
return true;
14412+
} else if (prefix == "osd pool availability-status") {
14413+
TextTable tbl;
14414+
tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
14415+
tbl.define_column("UPTIME", TextTable::LEFT, TextTable::RIGHT);
14416+
tbl.define_column("DOWNTIME", TextTable::LEFT, TextTable::RIGHT);
14417+
tbl.define_column("NUMFAILURES", TextTable::LEFT, TextTable::RIGHT);
14418+
tbl.define_column("MTBF", TextTable::LEFT, TextTable::RIGHT);
14419+
tbl.define_column("MTTR", TextTable::LEFT, TextTable::RIGHT);
14420+
tbl.define_column("SCORE", TextTable::LEFT, TextTable::RIGHT);
14421+
tbl.define_column("AVAILABLE", TextTable::LEFT, TextTable::RIGHT);
14422+
std::map<uint64_t, PoolAvailability> pool_availability = mon.mgrstatmon()->get_pool_availability();
14423+
for (const auto& i : pool_availability) {
14424+
const auto& p = i.second;
14425+
double mtbf = p.num_failures > 0 ? (p.uptime / p.num_failures) : 0;
14426+
double mttr = p.num_failures > 0 ? (p.downtime / p.num_failures) : 0;
14427+
double score = mtbf > 0 ? mtbf / (mtbf + mttr): 1.0;
14428+
tbl << p.pool_name;
14429+
tbl << timespan_str(make_timespan(p.uptime));
14430+
tbl << timespan_str(make_timespan(p.downtime));
14431+
tbl << p.num_failures;
14432+
tbl << timespan_str(make_timespan(mtbf));
14433+
tbl << timespan_str(make_timespan(mttr));
14434+
tbl << score;
14435+
tbl << p.is_avail;
14436+
tbl << TextTable::endrow;
14437+
}
14438+
rdata.append(stringify(tbl));
1441014439
} else if (prefix == "osd force-create-pg") {
1441114440
pg_t pgid;
1441214441
string pgidstr;

src/mon/PGMap.cc

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap::Incremental, pgmap_inc, pgmap);
5656
void PGMapDigest::encode(bufferlist& bl, uint64_t features) const
5757
{
5858
// NOTE: see PGMap::encode_digest
59-
uint8_t v = 4;
59+
uint8_t v = 5;
6060
assert(HAVE_FEATURE(features, SERVER_NAUTILUS));
6161
ENCODE_START(v, 1, bl);
6262
encode(num_pg, bl);
@@ -77,12 +77,13 @@ void PGMapDigest::encode(bufferlist& bl, uint64_t features) const
7777
encode(avail_space_by_rule, bl);
7878
encode(purged_snaps, bl);
7979
encode(osd_sum_by_class, bl, features);
80+
encode(pool_pg_unavailable_map, bl);
8081
ENCODE_FINISH(bl);
8182
}
8283

8384
void PGMapDigest::decode(bufferlist::const_iterator& p)
8485
{
85-
DECODE_START(4, p);
86+
DECODE_START(5, p);
8687
assert(struct_v >= 4);
8788
decode(num_pg, p);
8889
decode(num_pg_active, p);
@@ -102,6 +103,9 @@ void PGMapDigest::decode(bufferlist::const_iterator& p)
102103
decode(avail_space_by_rule, p);
103104
decode(purged_snaps, p);
104105
decode(osd_sum_by_class, p);
106+
if (struct_v >= 5) {
107+
decode(pool_pg_unavailable_map, p);
108+
}
105109
DECODE_FINISH(p);
106110
}
107111

@@ -151,6 +155,18 @@ void PGMapDigest::dump(ceph::Formatter *f) const
151155
f->close_section();
152156
}
153157
f->close_section();
158+
f->open_array_section("pool_pg_unavailable_map");
159+
for (auto& p : pool_pg_unavailable_map) {
160+
f->open_object_section("pool_pg_unavailable_map");
161+
f->dump_string("poolid", std::to_string(p.first));
162+
f->open_array_section("pgs");
163+
for (const auto& pg : p.second) {
164+
f->dump_stream("pg") << pg;
165+
}
166+
f->close_section();
167+
f->close_section();
168+
}
169+
f->close_section();
154170
f->open_array_section("num_pg_by_osd");
155171
for (auto& p : num_pg_by_osd) {
156172
f->open_object_section("count");
@@ -1261,6 +1277,46 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
12611277
last_pg_scan = inc.pg_scan;
12621278
}
12631279

1280+
/*
1281+
Returns a map of all pools in a cluster. Each value lists any PGs that
1282+
are in any of the following states:
1283+
- non-active
1284+
- stale
1285+
1286+
Eg: {1=[1.0],2=[],3=[]}
1287+
Here the cluster has 3 pools with id 1,2,3 and pool 1 has an inactive PG 1.0
1288+
*/
1289+
void PGMap::get_unavailable_pg_in_pool_map(const OSDMap& osdmap)
1290+
{
1291+
dout(20) << __func__ << dendl;
1292+
pool_pg_unavailable_map.clear();
1293+
utime_t now(ceph_clock_now());
1294+
utime_t cutoff = now - utime_t(g_conf().get_val<int64_t>("mon_pg_stuck_threshold"), 0);
1295+
for (auto i = pg_stat.begin();
1296+
i != pg_stat.end();
1297+
++i) {
1298+
const auto poolid = i->first.pool();
1299+
pool_pg_unavailable_map[poolid];
1300+
utime_t val = cutoff;
1301+
1302+
if (!(i->second.state & PG_STATE_ACTIVE)) { // This case covers unknown state since unknow state bit == 0;
1303+
if (i->second.last_active < val)
1304+
val = i->second.last_active;
1305+
}
1306+
1307+
if (i->second.state & PG_STATE_STALE) {
1308+
if (i->second.last_unstale < val)
1309+
val = i->second.last_unstale;
1310+
}
1311+
1312+
if (val < cutoff) {
1313+
pool_pg_unavailable_map[poolid].push_back(i->first);
1314+
dout(20) << "pool: " << poolid << " pg: " << i->first
1315+
<< " is stuck unavailable" << " state: " << i->second.state << dendl;
1316+
}
1317+
}
1318+
}
1319+
12641320
void PGMap::calc_stats()
12651321
{
12661322
num_pg = 0;
@@ -1488,6 +1544,7 @@ void PGMap::encode_digest(const OSDMap& osdmap,
14881544
get_rules_avail(osdmap, &avail_space_by_rule);
14891545
calc_osd_sum_by_class(osdmap);
14901546
calc_purged_snaps();
1547+
get_unavailable_pg_in_pool_map(osdmap);
14911548
PGMapDigest::encode(bl, features);
14921549
}
14931550

src/mon/PGMap.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
#include "common/Formatter.h"
2828
#include "osd/osd_types.h"
2929
#include "include/mempool.h"
30+
#include "mon/health_check.h"
31+
#include <sstream>
32+
#include "mon/mon_types.h"
3033

3134
#include <cstdint>
3235
#include <iosfwd>
@@ -57,6 +60,7 @@ class PGMapDigest {
5760
osd_stat_t osd_sum;
5861
mempool::pgmap::map<std::string,osd_stat_t> osd_sum_by_class;
5962
mempool::pgmap::unordered_map<uint64_t,int32_t> num_pg_by_state;
63+
mempool::pgmap::map<uint64_t,std::vector<pg_t>> pool_pg_unavailable_map;
6064
struct pg_count {
6165
int32_t acting = 0;
6266
int32_t up_not_acting = 0;
@@ -440,6 +444,7 @@ class PGMap : public PGMapDigest {
440444

441445
void apply_incremental(CephContext *cct, const Incremental& inc);
442446
void calc_stats();
447+
void get_unavailable_pg_in_pool_map(const OSDMap& osdmap);
443448
void stat_pg_add(const pg_t &pgid, const pg_stat_t &s,
444449
bool sameosds=false);
445450
bool stat_pg_sub(const pg_t &pgid, const pg_stat_t &s,

0 commit comments

Comments
 (0)