Skip to content

Commit f5eadff

Browse files
committed
librbd: fix mirror image status summary in a namespace
For the purposes of the summary with image counts, "rbd mirror pool status" command is supposed to count each image only once. To this end, for unidirectional mirroring the status of the receiving site should be taken while for bidirectional mirroring the statuses should be combined/reduced. For example, if mirroring is enabled on a single image and everything is in order, the summary is expected to be image health: OK images: 1 total 1 replaying on both clusters even though on the primary the local status is MIRROR_IMAGE_STATUS_STATE_STOPPED and only on the secondary it's MIRROR_IMAGE_STATUS_STATE_REPLAYING. Currently this isn't the case for custom namespaces. In the same scenario the primary ends up reporting image health: OK images: 1 total 1 stopped based solely on the local status in a namespace. Fixes: https://tracker.ceph.com/issues/69911 Signed-off-by: Ilya Dryomov <[email protected]>
1 parent b0ef526 commit f5eadff

File tree

3 files changed

+45
-1
lines changed

3 files changed

+45
-1
lines changed

qa/workunits/rbd/rbd_mirror_bootstrap.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,28 @@ done
3535
rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "tx-only"'
3636

3737
create_image_and_enable_mirror ${CLUSTER1} ${POOL} image1
38+
create_image_and_enable_mirror ${CLUSTER1} ${POOL}/${NS1} image1
3839

3940
wait_for_image_replay_started ${CLUSTER2} ${POOL} image1
4041
write_image ${CLUSTER1} ${POOL} image1 100
4142
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${POOL} image1
4243
wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL} image1
4344

45+
POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${POOL})
46+
jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS}
47+
POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${POOL})
48+
jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS}
49+
50+
wait_for_image_replay_started ${CLUSTER2} ${POOL}/${NS1} image1
51+
write_image ${CLUSTER1} ${POOL}/${NS1} image1 100
52+
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL}/${NS1} ${POOL}/${NS1} image1
53+
wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL}/${NS1} image1
54+
55+
POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${POOL}/${NS1})
56+
jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS}
57+
POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${POOL}/${NS1})
58+
jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS}
59+
4460
testlog "TEST: verify rx-tx direction"
4561
# both rx-tx peers are added immediately by "rbd mirror pool peer bootstrap import"
4662
rbd --cluster ${CLUSTER1} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"'
@@ -52,6 +68,9 @@ create_image ${CLUSTER2} ${PARENT_POOL} image2
5268
enable_mirror ${CLUSTER1} ${PARENT_POOL} image1
5369
enable_mirror ${CLUSTER2} ${PARENT_POOL} image2
5470

71+
create_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL}/${NS1} image1
72+
create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL}/${NS1} image2
73+
5574
wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1
5675
write_image ${CLUSTER1} ${PARENT_POOL} image1 100
5776
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} ${PARENT_POOL} image1
@@ -62,6 +81,26 @@ write_image ${CLUSTER2} ${PARENT_POOL} image2 100
6281
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${PARENT_POOL} image2
6382
wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} image2
6483

84+
POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${PARENT_POOL})
85+
jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS}
86+
POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL})
87+
jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS}
88+
89+
wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL}/${NS1} image1
90+
write_image ${CLUSTER1} ${PARENT_POOL}/${NS1} image1 100
91+
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL}/${NS1} ${PARENT_POOL}/${NS1} image1
92+
wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${PARENT_POOL}/${NS1} image1
93+
94+
wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL}/${NS1} image2
95+
write_image ${CLUSTER2} ${PARENT_POOL}/${NS1} image2 100
96+
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL}/${NS1} ${PARENT_POOL}/${NS1} image2
97+
wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL}/${NS1} image2
98+
99+
POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${PARENT_POOL}/${NS1})
100+
jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS}
101+
POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}/${NS1})
102+
jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS}
103+
65104
testlog "TEST: pool replayer and callout cleanup when peer is updated"
66105
test_health_state ${CLUSTER1} ${PARENT_POOL} 'OK'
67106
test_health_state ${CLUSTER2} ${PARENT_POOL} 'OK'

qa/workunits/rbd/rbd_mirror_helpers.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,11 @@ setup_pools()
299299

300300
rbd --cluster ${cluster} namespace create ${POOL}/${NS1}
301301
rbd --cluster ${cluster} namespace create ${POOL}/${NS2}
302+
rbd --cluster ${cluster} namespace create ${PARENT_POOL}/${NS1}
302303

303304
rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS1} ${MIRROR_POOL_MODE}
304305
rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS2} image
306+
rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL}/${NS1} ${MIRROR_POOL_MODE}
305307

306308
if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then
307309
if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then

src/librbd/api/Mirror.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1993,8 +1993,11 @@ int Mirror<I>::image_status_summary(librados::IoCtx& io_ctx,
19931993
MirrorImageStatusStates *states) {
19941994
CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
19951995

1996+
librados::IoCtx default_ns_io_ctx;
1997+
default_ns_io_ctx.dup(io_ctx);
1998+
default_ns_io_ctx.set_namespace("");
19961999
std::vector<cls::rbd::MirrorPeer> mirror_peers;
1997-
int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
2000+
int r = cls_client::mirror_peer_list(&default_ns_io_ctx, &mirror_peers);
19982001
if (r < 0 && r != -ENOENT) {
19992002
lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl;
20002003
return r;

0 commit comments

Comments
 (0)