Skip to content

Commit 0a046b6

Browse files
authored
Merge pull request ceph#53251 from idryomov/wip-61707
librbd: make CreatePrimaryRequest remove any unlinked mirror snapshots Reviewed-by: Ramana Raja <[email protected]>
2 parents 8d5f550 + 153df2d commit 0a046b6

File tree

7 files changed

+520
-59
lines changed

7 files changed

+520
-59
lines changed

qa/workunits/rbd/rbd_mirror_helpers.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,16 @@ wait_for_snap_removed_from_trash()
11691169
return 1
11701170
}
11711171

1172+
count_mirror_snaps()
1173+
{
1174+
local cluster=$1
1175+
local pool=$2
1176+
local image=$3
1177+
1178+
rbd --cluster ${cluster} snap ls ${pool}/${image} --all |
1179+
grep -c -F " mirror ("
1180+
}
1181+
11721182
write_image()
11731183
{
11741184
local cluster=$1

qa/workunits/rbd/rbd_mirror_journal.sh

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,29 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary
214214
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
215215
compare_images ${POOL} ${image}
216216

217-
# force promote
217+
testlog "TEST: failover / failback loop"
218+
for i in `seq 1 20`; do
219+
demote_image ${CLUSTER2} ${POOL} ${image}
220+
wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
221+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
222+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
223+
promote_image ${CLUSTER1} ${POOL} ${image}
224+
wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
225+
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
226+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
227+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying'
228+
demote_image ${CLUSTER1} ${POOL} ${image}
229+
wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
230+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
231+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
232+
promote_image ${CLUSTER2} ${POOL} ${image}
233+
wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
234+
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
235+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
236+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
237+
done
238+
239+
testlog "TEST: force promote"
218240
force_promote_image=test_force_promote
219241
create_image ${CLUSTER2} ${POOL} ${force_promote_image}
220242
write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100

qa/workunits/rbd/rbd_mirror_snapshot.sh

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,32 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
220220
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
221221
compare_images ${POOL} ${image}
222222

223-
# force promote
223+
testlog "TEST: failover / failback loop"
224+
for i in `seq 1 20`; do
225+
demote_image ${CLUSTER2} ${POOL} ${image}
226+
wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
227+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
228+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
229+
promote_image ${CLUSTER1} ${POOL} ${image}
230+
wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
231+
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
232+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
233+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying'
234+
demote_image ${CLUSTER1} ${POOL} ${image}
235+
wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
236+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
237+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
238+
promote_image ${CLUSTER2} ${POOL} ${image}
239+
wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
240+
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
241+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
242+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
243+
done
244+
# check that demote (or other mirror snapshots) don't pile up
245+
test "$(count_mirror_snaps ${CLUSTER1} ${POOL} ${image})" -le 3
246+
test "$(count_mirror_snaps ${CLUSTER2} ${POOL} ${image})" -le 3
247+
248+
testlog "TEST: force promote"
224249
force_promote_image=test_force_promote
225250
create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image}
226251
write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100

src/librbd/mirror/snapshot/CreatePrimaryRequest.cc

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -177,62 +177,69 @@ void CreatePrimaryRequest<I>::handle_refresh_image(int r) {
177177

178178
template <typename I>
179179
void CreatePrimaryRequest<I>::unlink_peer() {
180+
// TODO: Document semantics for unlink_peer
180181
uint64_t max_snapshots = m_image_ctx->config.template get_val<uint64_t>(
181182
"rbd_mirroring_max_mirroring_snapshots");
182183
ceph_assert(max_snapshots >= 3);
183184

184185
std::string peer_uuid;
185186
uint64_t snap_id = CEPH_NOSNAP;
186187

187-
for (auto &peer : m_mirror_peer_uuids) {
188+
{
188189
std::shared_lock image_locker{m_image_ctx->image_lock};
189-
size_t count = 0;
190-
uint64_t unlink_snap_id = 0;
191-
for (auto &snap_it : m_image_ctx->snap_info) {
192-
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
193-
&snap_it.second.snap_namespace);
194-
if (info == nullptr) {
195-
continue;
196-
}
197-
if (info->state != cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
198-
// reset counters -- we count primary snapshots after the last promotion
199-
count = 0;
200-
unlink_snap_id = 0;
201-
continue;
202-
}
203-
// call UnlinkPeerRequest only if the snapshot is linked with this peer
204-
// or if it's not linked with any peer (happens if mirroring is enabled
205-
// on a pool with no peers configured or if UnlinkPeerRequest gets
206-
// interrupted)
207-
if (!info->mirror_peer_uuids.empty() &&
208-
info->mirror_peer_uuids.count(peer) == 0) {
209-
continue;
210-
}
211-
if (info->mirror_peer_uuids.empty() || !info->complete) {
212-
peer_uuid = peer;
213-
snap_id = snap_it.first;
214-
break;
215-
}
216-
count++;
217-
if (count == max_snapshots) {
218-
unlink_snap_id = snap_it.first;
219-
}
220-
if (count > max_snapshots) {
221-
peer_uuid = peer;
222-
snap_id = unlink_snap_id;
223-
break;
190+
for (const auto& peer : m_mirror_peer_uuids) {
191+
for (const auto& snap_info_pair : m_image_ctx->snap_info) {
192+
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
193+
&snap_info_pair.second.snap_namespace);
194+
if (info == nullptr) {
195+
continue;
196+
}
197+
if (info->mirror_peer_uuids.empty() ||
198+
(info->mirror_peer_uuids.count(peer) != 0 &&
199+
info->is_primary() && !info->complete)) {
200+
peer_uuid = peer;
201+
snap_id = snap_info_pair.first;
202+
goto do_unlink;
203+
}
224204
}
225205
}
226-
if (snap_id != CEPH_NOSNAP) {
227-
break;
206+
for (const auto& peer : m_mirror_peer_uuids) {
207+
size_t count = 0;
208+
uint64_t unlink_snap_id = 0;
209+
for (const auto& snap_info_pair : m_image_ctx->snap_info) {
210+
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
211+
&snap_info_pair.second.snap_namespace);
212+
if (info == nullptr) {
213+
continue;
214+
}
215+
if (info->state != cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
216+
// reset counters -- we count primary snapshots after the last
217+
// promotion
218+
count = 0;
219+
unlink_snap_id = 0;
220+
continue;
221+
}
222+
if (info->mirror_peer_uuids.count(peer) == 0) {
223+
// snapshot is not linked with this peer
224+
continue;
225+
}
226+
count++;
227+
if (count == max_snapshots) {
228+
unlink_snap_id = snap_info_pair.first;
229+
}
230+
if (count > max_snapshots) {
231+
peer_uuid = peer;
232+
snap_id = unlink_snap_id;
233+
goto do_unlink;
234+
}
235+
}
228236
}
229237
}
230238

231-
if (snap_id == CEPH_NOSNAP) {
232-
finish(0);
233-
return;
234-
}
239+
finish(0);
240+
return;
235241

242+
do_unlink:
236243
CephContext *cct = m_image_ctx->cct;
237244
ldout(cct, 15) << "peer=" << peer_uuid << ", snap_id=" << snap_id << dendl;
238245

src/librbd/operation/SnapshotRemoveRequest.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,10 @@ void SnapshotRemoveRequest<I>::handle_remove_object_map(int r) {
355355
template <typename I>
356356
void SnapshotRemoveRequest<I>::remove_image_state() {
357357
I &image_ctx = this->m_image_ctx;
358-
auto type = cls::rbd::get_snap_namespace_type(m_snap_namespace);
359358

360-
if (type != cls::rbd::SNAPSHOT_NAMESPACE_TYPE_MIRROR) {
359+
const auto* info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
360+
&m_snap_namespace);
361+
if (info == nullptr || info->is_orphan()) {
361362
release_snap_id();
362363
return;
363364
}

0 commit comments

Comments
 (0)