Skip to content

Commit 0712c11

Browse files
authored
Merge pull request ceph#59673 from shraddhaag/availability-score-feature
monitor: add availability score feature
2 parents 62a90d0 + d8c23fc commit 0712c11

File tree

10 files changed

+381
-3
lines changed

10 files changed

+381
-3
lines changed

qa/standalone/mon/availability.sh

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (C) 2024 IBM <[email protected]>
4+
#
5+
# Author: Shraddha Agrawal <[email protected]>
6+
#
7+
# This program is free software; you can redistribute it and/or modify
8+
# it under the terms of the GNU Library Public License as published by
9+
# the Free Software Foundation; either version 2, or (at your option)
10+
# any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU Library Public License for more details.
16+
#
17+
18+
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19+
20+
function run() {
21+
local dir=$1
22+
shift
23+
24+
export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
25+
export CEPH_ARGS
26+
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27+
CEPH_ARGS+="--mon-host=$CEPH_MON "
28+
29+
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
30+
for func in $funcs ; do
31+
setup $dir || return 1
32+
$func $dir || return 1
33+
teardown $dir || return 1
34+
done
35+
}
36+
37+
function TEST_availablity_score() {
38+
local dir=$1
39+
40+
run_mon $dir a || return 1
41+
run_mgr $dir x || return 1
42+
run_osd $dir 0 || return 1
43+
run_osd $dir 1 || return 1
44+
run_osd $dir 2 || return 1
45+
46+
ceph config set osd osd_recovery_delay_start 10000
47+
ceph config get osd.* osd_recovery_delay_start
48+
ceph osd pool create foo 64
49+
ceph osd pool set foo size 2 --yes-i-really-mean-it
50+
51+
WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean
52+
ceph osd pool stats
53+
54+
ceph -s
55+
ceph health | grep HEALTH_OK || return 1
56+
ceph osd pool availability-status
57+
AVAILABILITY_STATUS=$(ceph osd pool availability-status | grep -w "foo")
58+
SCORE=$(echo "$AVAILABILITY_STATUS" | awk '{print $7}')
59+
IS_AVAILABLE=$(echo "$AVAILABILITY_STATUS" | awk '{print $8}')
60+
if [ $IS_AVAILABLE -ne 1 ]; then
61+
echo "Failed: Pool is not available in availabilty status"
62+
fi
63+
64+
# write some objects
65+
for i in $(seq 1 10); do
66+
rados --pool foo put object_id$i /etc/group;
67+
done
68+
69+
# kill OSD 0
70+
kill_daemons $dir TERM osd.0 >&2 < /dev/null || return 1
71+
sleep 10
72+
ceph -s
73+
ceph osd pool availability-status
74+
75+
#write more objects
76+
for i in $(seq 1 20); do
77+
rados --pool foo put object_id$i /etc/group;
78+
done
79+
80+
# bring osd 0 back up
81+
activate_osd $dir 0 || return 1
82+
ceph -s
83+
ceph osd pool availability-status
84+
85+
# kill osd 1
86+
kill_daemons $dir TERM osd.1 >&2 < /dev/null || return 1
87+
ceph -s
88+
ceph osd pool availability-status
89+
90+
# wait for 10 seconds so availability score is refreshed
91+
# check ceph heath and availability score
92+
sleep 10
93+
ceph -s
94+
ceph osd pool availability-status
95+
AVAILABILITY_STATUS=$(ceph osd pool availability-status | grep -w "foo")
96+
IS_AVAILABLE=$(echo "$AVAILABILITY_STATUS" | awk '{print $8}')
97+
NEW_SCORE=$(echo "$AVAILABILITY_STATUS" | awk '{print $7}')
98+
if [ $IS_AVAILABLE -ne 0 ]; then
99+
echo "Failed: Pool is available in availabilty status when unfound objects present"
100+
return 1
101+
fi
102+
if (( $(echo "$NEW_SCORE >= $SCORE" | bc -l) )); then
103+
echo "Failed: Availability score for the pool did not drop"
104+
return 1
105+
fi
106+
107+
echo "TEST PASSED"
108+
return 0
109+
}
110+
111+
main availability "$@"

src/mon/MgrStatMonitor.cc

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,79 @@ void MgrStatMonitor::create_initial()
6666
encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL);
6767
}
6868

69+
void MgrStatMonitor::calc_pool_availability()
70+
{
71+
dout(20) << __func__ << dendl;
72+
auto pool_avail_end = pool_availability.end();
73+
for (const auto& i : digest.pool_pg_unavailable_map) {
74+
const auto& poolid = i.first;
75+
if (pool_availability.find(poolid) == pool_avail_end){
76+
// New Pool so we add.
77+
pool_availability.insert({poolid, PoolAvailability()});
78+
dout(20) << __func__ << "Adding pool: " << poolid << dendl;
79+
}
80+
}
81+
utime_t now(ceph_clock_now());
82+
auto pool_unavail_end = digest.pool_pg_unavailable_map.end();
83+
for (const auto& i : pool_availability) {
84+
const auto& poolid = i.first;
85+
if (digest.pool_pg_unavailable_map.find(poolid) ==
86+
pool_unavail_end) {
87+
// delete none exist pool
88+
pool_availability.erase(poolid);
89+
dout(20) << __func__ << "Deleting pool: " << poolid << dendl;
90+
continue;
91+
}
92+
if (mon.osdmon()->osdmap.have_pg_pool(poolid)){
93+
// Currently, couldn't find an elegant way to get pool name
94+
pool_availability[poolid].pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
95+
} else {
96+
pool_availability.erase(poolid);
97+
dout(20) << __func__ << "pool: "
98+
<< poolid << " no longer exists in osdmap! Deleting pool: "
99+
<< poolid << dendl;
100+
continue;
101+
}
102+
if (pool_availability[poolid].is_avail) {
103+
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
104+
// avail to unavail
105+
dout(20) << __func__
106+
<< ": Pool " << poolid << " status: Available to Unavailable" << dendl;
107+
pool_availability[poolid].is_avail = false;
108+
pool_availability[poolid].num_failures += 1;
109+
pool_availability[poolid].last_downtime = now;
110+
pool_availability[poolid].uptime +=
111+
now - pool_availability[poolid].last_uptime;
112+
} else {
113+
// avail to avail
114+
dout(20) << __func__
115+
<< ": Pool " << poolid << " status: Available to Available" << dendl;
116+
pool_availability[poolid].uptime +=
117+
now - pool_availability[poolid].last_uptime;
118+
pool_availability[poolid].last_uptime = now;
119+
}
120+
} else {
121+
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
122+
// unavail to unavail
123+
dout(20) << __func__
124+
<< ": Pool " << poolid << " status: Unavailable to Unavailable" << dendl;
125+
pool_availability[poolid].downtime +=
126+
now - pool_availability[poolid].last_downtime;
127+
pool_availability[poolid].last_downtime = now;
128+
} else {
129+
// unavail to avail
130+
dout(20) << __func__
131+
<< ": Pool " << poolid << " status: Unavailable to Available" << dendl;
132+
pool_availability[poolid].is_avail = true;
133+
pool_availability[poolid].last_uptime = now;
134+
pool_availability[poolid].uptime +=
135+
now - pool_availability[poolid].last_downtime;
136+
}
137+
}
138+
}
139+
pending_pool_availability.swap(pool_availability);
140+
}
141+
69142
void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
70143
{
71144
version = get_last_committed();
@@ -82,9 +155,13 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
82155
if (!p.end()) {
83156
decode(progress_events, p);
84157
}
158+
if (!p.end()) {
159+
decode(pool_availability, p);
160+
}
85161
dout(10) << __func__ << " v" << version
86162
<< " service_map e" << service_map.epoch
87163
<< " " << progress_events.size() << " progress events"
164+
<< " " << pool_availability.size() << " pools availability tracked"
88165
<< dendl;
89166
}
90167
catch (ceph::buffer::error& e) {
@@ -95,6 +172,7 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)
95172
check_subs();
96173
update_logger();
97174
mon.osdmon()->notify_new_pg_digest();
175+
calc_pool_availability();
98176
}
99177

100178
void MgrStatMonitor::update_logger()
@@ -156,6 +234,7 @@ void MgrStatMonitor::encode_pending(MonitorDBStore::TransactionRef t)
156234
ceph_assert(pending_service_map_bl.length());
157235
bl.append(pending_service_map_bl);
158236
encode(pending_progress_events, bl);
237+
encode(pending_pool_availability, bl);
159238
put_version(t, version, bl);
160239
put_last_committed(t, version);
161240

@@ -260,6 +339,15 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op)
260339
jf.close_section();
261340
jf.flush(*_dout);
262341
*_dout << dendl;
342+
dout(20) << "pool_availability:\n";
343+
JSONFormatter jf(true);
344+
jf.open_object_section("pool_availability");
345+
for (auto& i : pending_pool_availability) {
346+
jf.dump_object(std::to_string(i.first), i.second);
347+
}
348+
jf.close_section();
349+
jf.flush(*_dout);
350+
*_dout << dendl;
263351
return true;
264352
}
265353

src/mon/MgrStatMonitor.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ class MgrStatMonitor : public PaxosService {
1414
PGMapDigest digest;
1515
ServiceMap service_map;
1616
std::map<std::string,ProgressEvent> progress_events;
17+
std::map<uint64_t, PoolAvailability> pool_availability;
1718

1819
// pending commit
1920
PGMapDigest pending_digest;
2021
health_check_map_t pending_health_checks;
2122
std::map<std::string,ProgressEvent> pending_progress_events;
2223
ceph::buffer::list pending_service_map_bl;
24+
std::map<uint64_t, PoolAvailability> pending_pool_availability;
2325

2426
public:
2527
MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name);
@@ -49,6 +51,8 @@ class MgrStatMonitor : public PaxosService {
4951
bool preprocess_getpoolstats(MonOpRequestRef op);
5052
bool preprocess_statfs(MonOpRequestRef op);
5153

54+
void calc_pool_availability();
55+
5256
void check_sub(Subscription *sub);
5357
void check_subs();
5458
void send_digests();
@@ -83,6 +87,10 @@ class MgrStatMonitor : public PaxosService {
8387
return digest;
8488
}
8589

90+
const std::map<uint64_t, PoolAvailability>& get_pool_availability() {
91+
return pool_availability;
92+
}
93+
8694
ceph_statfs get_statfs(OSDMap& osdmap,
8795
std::optional<int64_t> data_pool) const {
8896
return digest.get_statfs(osdmap, data_pool);

src/mon/MonCommands.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,9 @@ COMMAND("osd pool stretch unset "
12431243
"name=min_size,type=CephInt,range=0 ",
12441244
"unset the stretch mode for the pool",
12451245
"osd", "rw")
1246+
COMMAND("osd pool availability-status", \
1247+
"obtain availability stats from all pools", \
1248+
"osd", "r")
12461249
COMMAND("osd utilization",
12471250
"get basic pg distribution stats",
12481251
"osd", "r")

src/mon/OSDMonitor.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ using ceph::ErasureCodeProfile;
111111
using ceph::Formatter;
112112
using ceph::JSONFormatter;
113113
using ceph::make_message;
114+
using ceph::make_timespan;
115+
using ceph::timespan_str;
114116
using namespace std::literals;
115117

116118
#define dout_subsys ceph_subsys_mon
@@ -14407,6 +14409,33 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
1440714409
wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs,
1440814410
get_last_committed() + 1));
1440914411
return true;
14412+
} else if (prefix == "osd pool availability-status") {
14413+
TextTable tbl;
14414+
tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
14415+
tbl.define_column("UPTIME", TextTable::LEFT, TextTable::RIGHT);
14416+
tbl.define_column("DOWNTIME", TextTable::LEFT, TextTable::RIGHT);
14417+
tbl.define_column("NUMFAILURES", TextTable::LEFT, TextTable::RIGHT);
14418+
tbl.define_column("MTBF", TextTable::LEFT, TextTable::RIGHT);
14419+
tbl.define_column("MTTR", TextTable::LEFT, TextTable::RIGHT);
14420+
tbl.define_column("SCORE", TextTable::LEFT, TextTable::RIGHT);
14421+
tbl.define_column("AVAILABLE", TextTable::LEFT, TextTable::RIGHT);
14422+
std::map<uint64_t, PoolAvailability> pool_availability = mon.mgrstatmon()->get_pool_availability();
14423+
for (const auto& i : pool_availability) {
14424+
const auto& p = i.second;
14425+
double mtbf = p.num_failures > 0 ? (p.uptime / p.num_failures) : 0;
14426+
double mttr = p.num_failures > 0 ? (p.downtime / p.num_failures) : 0;
14427+
double score = mtbf > 0 ? mtbf / (mtbf + mttr): 1.0;
14428+
tbl << p.pool_name;
14429+
tbl << timespan_str(make_timespan(p.uptime));
14430+
tbl << timespan_str(make_timespan(p.downtime));
14431+
tbl << p.num_failures;
14432+
tbl << timespan_str(make_timespan(mtbf));
14433+
tbl << timespan_str(make_timespan(mttr));
14434+
tbl << score;
14435+
tbl << p.is_avail;
14436+
tbl << TextTable::endrow;
14437+
}
14438+
rdata.append(stringify(tbl));
1441014439
} else if (prefix == "osd force-create-pg") {
1441114440
pg_t pgid;
1441214441
string pgidstr;

0 commit comments

Comments
 (0)