Skip to content

Commit d315e3d

Browse files
Merge pull request ceph#63552 from shraddhaag/wip-shraddhaag-availability-3
mon: add command osd pool clear-availability-status
2 parents 19b19a4 + ba852da commit d315e3d

File tree

7 files changed

+82
-4
lines changed

7 files changed

+82
-4
lines changed

PendingReleaseNotes

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,14 @@
147147
`s3:GetObjectRetention` are also considered when fetching the source object.
148148
Replication of tags is controlled by the `s3:GetObject(Version)Tagging` permission.
149149

150-
* RADOS: A new command, `ceph osd pool availability-status`, has been added that allows
150+
* RADOS: A new command, ``ceph osd pool availability-status``, has been added that allows
151151
users to view the availability score for each pool in a cluster. A pool is considered
152152
unavailable if any PG in the pool is not in active state or if there are unfound
153153
objects. Otherwise the pool is considered available. The score is updated every
154-
5 seconds. The feature is on by default. A new config option `enable_availability_tracking`
155-
can be used to turn off the feature if required. This feature is in tech preview.
154+
5 seconds. The feature is on by default. A new config option ``enable_availability_tracking``
155+
can be used to turn off the feature if required. Another command is added to clear the
156+
availability status for a specific pool, ``ceph osd pool clear-availability-status <pool-name>``.
157+
This feature is in tech preview.
156158
Related trackers:
157159
- https://tracker.ceph.com/issues/67777
158160

doc/rados/operations/monitoring.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,4 +794,13 @@ downtime, the ``enable_availability_tracking`` config option can be set to ``fal
794794
ceph config set mon enable_availability_tracking false
795795

796796
While the feature is turned off, the last calculated score will be preserved. The
797-
score will again start updating once the feature is turned on again.
797+
score will again start updating once the feature is turned on again.
798+
799+
It's also possible to clear the data availability score for a specific
800+
pool if needed with a command of the following form:
801+
802+
.. prompt:: bash $
803+
804+
ceph osd pool clear-availability-status <pool-name>
805+
806+
Note: Clearing a score is not allowed if the feature itself is disabled.

qa/standalone/mon/availability.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ function TEST_availablity_score() {
7373
fi
7474
sleep 120
7575

76+
# try clearing availability score: should fail as feature is disabled
77+
CLEAR_SCORE_RESPONSE=$(ceph osd pool clear-availability-status foo)
78+
if [ "$CLEAR_SCORE_RESPONSE" != "" ]; then
79+
echo "Failed: score clear attempted when feature is disabled"
80+
return 1
81+
fi
82+
7683
# enable feature and check is score updated when it was off
7784
ceph config set mon enable_availability_tracking true
7885
AVAILABILITY_STATUS=$(ceph osd pool availability-status | grep -w "foo")
@@ -125,7 +132,19 @@ function TEST_availablity_score() {
125132
echo "Failed: Availability score for the pool did not drop"
126133
return 1
127134
fi
135+
UPTIME_DURATION=$(echo "$AVAILABILITY_STATUS" | awk '{print $2}')
136+
UPTIME_SECONDS=$(( ${UPTIME_DURATION%[sm]} * (${UPTIME_DURATION: -1} == "m" ? 60 : 1) ))
128137

138+
# reset availability score for pool foo
139+
ceph osd pool clear-availability-status foo
140+
AVAILABILITY_STATUS=$(ceph osd pool availability-status | grep -w "foo")
141+
NEW_UPTIME_DURATION=$(echo "$AVAILABILITY_STATUS" | awk '{print $2}')
142+
NEW_UPTIME_SECONDS=$(( ${UPTIME_DURATION%[sm]} * (${UPTIME_DURATION: -1} == "m" ? 60 : 1) ))
143+
if [ "$NEW_UPTIME_SECONDS" -gt "$UPTIME_SECONDS" ]; then
144+
echo "Failed: Availability score for the pool did not drop after clearing"
145+
return 1
146+
fi
147+
129148
echo "TEST PASSED"
130149
return 0
131150
}

src/mon/MgrStatMonitor.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,20 @@ void MgrStatMonitor::create_initial()
101101
encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL);
102102
}
103103

104+
void MgrStatMonitor::clear_pool_availability(int64_t poolid)
105+
{
106+
dout(20) << __func__ << dendl;
107+
std::scoped_lock l(lock);
108+
auto pool_itr = pending_pool_availability.find(poolid);
109+
if (pool_itr != pending_pool_availability.end()) {
110+
pool_itr->second = PoolAvailability();
111+
} else {
112+
dout(1) << "failed to clear a non-existing pool: " << poolid << dendl;
113+
return;
114+
};
115+
dout(20) << __func__ << " cleared availability score for pool: " << poolid << dendl;
116+
}
117+
104118
void MgrStatMonitor::calc_pool_availability()
105119
{
106120
dout(20) << __func__ << dendl;
@@ -398,6 +412,7 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op)
398412
dout(20) << "pool_availability:\n";
399413
JSONFormatter jf(true);
400414
jf.open_object_section("pool_availability");
415+
std::scoped_lock l(lock);
401416
for (auto& i : pending_pool_availability) {
402417
jf.dump_object(std::to_string(i.first), i.second);
403418
}

src/mon/MgrStatMonitor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
bool enable_availability_tracking = g_conf().get_val<bool>("enable_availability_tracking"); ///< tracking availability score feature
5959
std::optional<utime_t> reset_availability_last_uptime_downtime_val;
6060

61+
void clear_pool_availability(int64_t poolid);
62+
6163
void check_sub(Subscription *sub);
6264
void check_subs();
6365
void send_digests();

src/mon/MonCommands.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,6 +1246,10 @@ COMMAND("osd pool stretch unset "
12461246
COMMAND("osd pool availability-status", \
12471247
"obtain availability stats from all pools", \
12481248
"osd", "r")
1249+
COMMAND("osd pool clear-availability-status "
1250+
"name=pool,type=CephPoolname ",
1251+
"clear a pool's existing availability stats",
1252+
"osd", "r")
12491253
COMMAND("osd utilization",
12501254
"get basic pg distribution stats",
12511255
"osd", "r")

src/mon/OSDMonitor.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14406,6 +14406,33 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
1440614406
wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs,
1440714407
get_last_committed() + 1));
1440814408
return true;
14409+
} else if (prefix == "osd pool clear-availability-status") {
14410+
if (!g_conf().get_val<bool>("enable_availability_tracking")) {
14411+
ss << "Availability tracking is disabled. Availability status can not be cleared "
14412+
<< "while the feature is disabled. Enable it by setting the config "
14413+
<< "option enable_availability_tracking to ``true`` then try again.";
14414+
err = -EOPNOTSUPP;
14415+
goto reply_no_propose;
14416+
}
14417+
14418+
string pool_name;
14419+
cmd_getval(cmdmap, "pool", pool_name);
14420+
int64_t pool_id = osdmap.lookup_pg_pool_name(pool_name);
14421+
// check if pool exists
14422+
if (pool_id < 0) {
14423+
ss << "unrecognized pool '" << pool_name << "'";
14424+
err = -ENOENT;
14425+
goto reply_no_propose;
14426+
}
14427+
std::map<uint64_t, PoolAvailability> pool_availability = mon.mgrstatmon()->get_pool_availability();
14428+
// check if pool exists in pool_availability
14429+
if (pool_availability.find(pool_id) == pool_availability.end()){
14430+
ss << "unrecognized pool '" << pool_name << "'";
14431+
err = -ENOENT;
14432+
goto reply_no_propose;
14433+
}
14434+
// clear existing calculations
14435+
mon.mgrstatmon()->clear_pool_availability(pool_id);
1440914436
} else if (prefix == "osd pool availability-status") {
1441014437
if (!g_conf().get_val<bool>("enable_availability_tracking")) {
1441114438
ss << "availability tracking is disabled; you can enable it by setting the config option enable_availability_tracking";

0 commit comments

Comments
 (0)