Skip to content

Commit 9c05d3d

Browse files
librbd: make CreatePrimaryRequest remove any unlinked mirror snapshots
After commit ac552c9 ("librbd: localize snap_remove op for mirror snapshots"), rbd-mirror daemon no longer removes mirror snapshots when it's done syncing them -- instead it only unlinks from them. However, CreatePrimaryRequest state machine was not adjusted to compensate and hence two cases were missed: - primary demotion snapshot (rbd-mirror daemon unlinks from primary demotion snapshots just like it does from regular primary snapshots); this comes up when an image is demoted but then promoted on the same cluster - non-primary demotion snapshot (unlike regular non-primary snapshots, non-primary demotion snapshots store peer uuids and rbd-mirror daemon does unlinking just like in the case of primary snapshots); this comes up when an image is demoted and promoted on the other cluster Related is the case of orphan snapshots. Since they are dummy to begin with, CreatePrimaryRequest would now clean up the orphan snapshot after the creation of the force promote snapshot. Fixes: https://tracker.ceph.com/issues/61707 Co-authored-by: Christopher Hoffman <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent cfae3f7 commit 9c05d3d

File tree

2 files changed

+406
-51
lines changed

2 files changed

+406
-51
lines changed

src/librbd/mirror/snapshot/CreatePrimaryRequest.cc

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -177,62 +177,69 @@ void CreatePrimaryRequest<I>::handle_refresh_image(int r) {
177177

178178
template <typename I>
179179
void CreatePrimaryRequest<I>::unlink_peer() {
180+
// TODO: Document semantics for unlink_peer
180181
uint64_t max_snapshots = m_image_ctx->config.template get_val<uint64_t>(
181182
"rbd_mirroring_max_mirroring_snapshots");
182183
ceph_assert(max_snapshots >= 3);
183184

184185
std::string peer_uuid;
185186
uint64_t snap_id = CEPH_NOSNAP;
186187

187-
for (auto &peer : m_mirror_peer_uuids) {
188+
{
188189
std::shared_lock image_locker{m_image_ctx->image_lock};
189-
size_t count = 0;
190-
uint64_t unlink_snap_id = 0;
191-
for (auto &snap_it : m_image_ctx->snap_info) {
192-
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
193-
&snap_it.second.snap_namespace);
194-
if (info == nullptr) {
195-
continue;
196-
}
197-
if (info->state != cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
198-
// reset counters -- we count primary snapshots after the last promotion
199-
count = 0;
200-
unlink_snap_id = 0;
201-
continue;
202-
}
203-
// call UnlinkPeerRequest only if the snapshot is linked with this peer
204-
// or if it's not linked with any peer (happens if mirroring is enabled
205-
// on a pool with no peers configured or if UnlinkPeerRequest gets
206-
// interrupted)
207-
if (!info->mirror_peer_uuids.empty() &&
208-
info->mirror_peer_uuids.count(peer) == 0) {
209-
continue;
210-
}
211-
if (info->mirror_peer_uuids.empty() || !info->complete) {
212-
peer_uuid = peer;
213-
snap_id = snap_it.first;
214-
break;
215-
}
216-
count++;
217-
if (count == max_snapshots) {
218-
unlink_snap_id = snap_it.first;
219-
}
220-
if (count > max_snapshots) {
221-
peer_uuid = peer;
222-
snap_id = unlink_snap_id;
223-
break;
190+
for (const auto& peer : m_mirror_peer_uuids) {
191+
for (const auto& snap_info_pair : m_image_ctx->snap_info) {
192+
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
193+
&snap_info_pair.second.snap_namespace);
194+
if (info == nullptr) {
195+
continue;
196+
}
197+
if (info->mirror_peer_uuids.empty() ||
198+
(info->mirror_peer_uuids.count(peer) != 0 &&
199+
info->is_primary() && !info->complete)) {
200+
peer_uuid = peer;
201+
snap_id = snap_info_pair.first;
202+
goto do_unlink;
203+
}
224204
}
225205
}
226-
if (snap_id != CEPH_NOSNAP) {
227-
break;
206+
for (const auto& peer : m_mirror_peer_uuids) {
207+
size_t count = 0;
208+
uint64_t unlink_snap_id = 0;
209+
for (const auto& snap_info_pair : m_image_ctx->snap_info) {
210+
auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
211+
&snap_info_pair.second.snap_namespace);
212+
if (info == nullptr) {
213+
continue;
214+
}
215+
if (info->state != cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
216+
// reset counters -- we count primary snapshots after the last
217+
// promotion
218+
count = 0;
219+
unlink_snap_id = 0;
220+
continue;
221+
}
222+
if (info->mirror_peer_uuids.count(peer) == 0) {
223+
// snapshot is not linked with this peer
224+
continue;
225+
}
226+
count++;
227+
if (count == max_snapshots) {
228+
unlink_snap_id = snap_info_pair.first;
229+
}
230+
if (count > max_snapshots) {
231+
peer_uuid = peer;
232+
snap_id = unlink_snap_id;
233+
goto do_unlink;
234+
}
235+
}
228236
}
229237
}
230238

231-
if (snap_id == CEPH_NOSNAP) {
232-
finish(0);
233-
return;
234-
}
239+
finish(0);
240+
return;
235241

242+
do_unlink:
236243
CephContext *cct = m_image_ctx->cct;
237244
ldout(cct, 15) << "peer=" << peer_uuid << ", snap_id=" << snap_id << dendl;
238245

0 commit comments

Comments
 (0)