Skip to content

Commit e4657cb

Browse files
committed
librbd: stop filtering async request error codes
The roots of this go back to 2015 when snap create was changed to filter EEXIST in commit 63f6c9b ("librbd: fixed snap create race conditions") and flatten respectively EINVAL in commit ef7e210 ("librbd: better handling for duplicate flatten requests"). From there this pattern made it to most other operations that can be proxied including "rbd migration execute". The motivation was to suppress generation of an "expected" error in response to a duplicate async request notification for the operation. However, doing this at the top of the handler (right before returning to the caller) and for an error as generic as EINVAL is super fragile. It's trivial for an error that is being filtered to sneak in with a lower level change completely unnoticed. For example, live migration recently added NBD stream which is implemented on top of libnbd and it turns out that some libnbd APIs return EINVAL on various occasions when the NBD endpoint disappears and an error like ENOTCONN would make more sense. If this occurs during "rbd migration execute" operation, the rest of librbd never learns that migration was disrupted and the image is transitioned to MIGRATION_STATE_EXECUTED, thus handing a partially imported (read: corrupted) image to the user. Luckily, with commits 07fbc4b ("librbd: track complete async operation requests") and 96bc204 ("librbd: track complete async operation return code"), the scenario which originally prompted error code filtering isn't an issue anymore. Despite a few shortcomings (e.g. when an async request notification is acked with result 0, it's impossible to tell whether a) a new operation was kicked off, b) there is an operation that is still in progress or c) it's for an operation that completed earlier but hasn't "expired" yet), even just commit 07fbc4b by itself prevents a duplicate notification from kicking off a second operation that could generate an error for something that actually succeeded. With that in mind, eradicate error code filtering from Operations class. Fixes: https://tracker.ceph.com/issues/58185 Signed-off-by: Ilya Dryomov <[email protected]>
1 parent bceddc4 commit e4657cb

File tree

1 file changed

+15
-53
lines changed

1 file changed

+15
-53
lines changed

src/librbd/Operations.cc

Lines changed: 15 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ struct C_InvokeAsyncRequest : public Context {
185185
bool permit_snapshot;
186186
boost::function<void(Context*)> local;
187187
boost::function<void(Context*)> remote;
188-
std::set<int> filter_error_codes;
189188
Context *on_finish;
190189
bool request_lock = false;
191190

@@ -194,11 +193,10 @@ struct C_InvokeAsyncRequest : public Context {
194193
bool permit_snapshot,
195194
const boost::function<void(Context*)>& local,
196195
const boost::function<void(Context*)>& remote,
197-
const std::set<int> &filter_error_codes,
198196
Context *on_finish)
199197
: image_ctx(image_ctx), operation(operation), request_type(request_type),
200198
permit_snapshot(permit_snapshot), local(local), remote(remote),
201-
filter_error_codes(filter_error_codes), on_finish(on_finish) {
199+
on_finish(on_finish) {
202200
}
203201

204202
void send() {
@@ -382,9 +380,6 @@ struct C_InvokeAsyncRequest : public Context {
382380
}
383381

384382
void finish(int r) override {
385-
if (filter_error_codes.count(r) != 0) {
386-
r = 0;
387-
}
388383
on_finish->complete(r);
389384
}
390385
};
@@ -503,11 +498,8 @@ int Operations<I>::flatten(ProgressContext &prog_ctx) {
503498
m_image_ctx.image_watcher, request_id,
504499
boost::ref(prog_ctx), _1));
505500

506-
if (r < 0 && r != -EINVAL) {
507-
return r;
508-
}
509501
ldout(cct, 20) << "flatten finished" << dendl;
510-
return 0;
502+
return r;
511503
}
512504

513505
template <typename I>
@@ -582,10 +574,7 @@ int Operations<I>::rebuild_object_map(ProgressContext &prog_ctx) {
582574
boost::ref(prog_ctx), _1));
583575

584576
ldout(cct, 10) << "rebuild object map finished" << dendl;
585-
if (r < 0) {
586-
return r;
587-
}
588-
return 0;
577+
return r;
589578
}
590579

591580
template <typename I>
@@ -686,12 +675,9 @@ int Operations<I>::rename(const char *dstname) {
686675
boost::bind(&ImageWatcher<I>::notify_rename,
687676
m_image_ctx.image_watcher, request_id,
688677
dstname, _1));
689-
if (r < 0 && r != -EEXIST) {
690-
return r;
691-
}
692678

693679
m_image_ctx.set_image_name(dstname);
694-
return 0;
680+
return r;
695681
}
696682

697683
template <typename I>
@@ -874,7 +860,7 @@ void Operations<I>::snap_create(const cls::rbd::SnapshotNamespace &snap_namespac
874860
boost::bind(&ImageWatcher<I>::notify_snap_create, m_image_ctx.image_watcher,
875861
request_id, snap_namespace, snap_name, flags,
876862
boost::ref(prog_ctx), _1),
877-
{-EEXIST}, on_finish);
863+
on_finish);
878864
req->send();
879865
}
880866

@@ -1077,7 +1063,7 @@ void Operations<I>::snap_remove(const cls::rbd::SnapshotNamespace& snap_namespac
10771063
boost::bind(&ImageWatcher<I>::notify_snap_remove,
10781064
m_image_ctx.image_watcher, request_id, snap_namespace,
10791065
snap_name, _1),
1080-
{-ENOENT}, on_finish);
1066+
on_finish);
10811067
req->send();
10821068
} else {
10831069
std::shared_lock owner_lock{m_image_ctx.owner_lock};
@@ -1173,9 +1159,6 @@ int Operations<I>::snap_rename(const char *srcname, const char *dstname) {
11731159
boost::bind(&ImageWatcher<I>::notify_snap_rename,
11741160
m_image_ctx.image_watcher, request_id,
11751161
snap_id, dstname, _1));
1176-
if (r < 0 && r != -EEXIST) {
1177-
return r;
1178-
}
11791162
} else {
11801163
C_SaferCond cond_ctx;
11811164
{
@@ -1184,13 +1167,10 @@ int Operations<I>::snap_rename(const char *srcname, const char *dstname) {
11841167
}
11851168

11861169
r = cond_ctx.wait();
1187-
if (r < 0) {
1188-
return r;
1189-
}
11901170
}
11911171

11921172
m_image_ctx.perfcounter->inc(l_librbd_snap_rename);
1193-
return 0;
1173+
return r;
11941174
}
11951175

11961176
template <typename I>
@@ -1275,9 +1255,6 @@ int Operations<I>::snap_protect(const cls::rbd::SnapshotNamespace& snap_namespac
12751255
boost::bind(&ImageWatcher<I>::notify_snap_protect,
12761256
m_image_ctx.image_watcher, request_id,
12771257
snap_namespace, snap_name, _1));
1278-
if (r < 0 && r != -EBUSY) {
1279-
return r;
1280-
}
12811258
} else {
12821259
C_SaferCond cond_ctx;
12831260
{
@@ -1286,11 +1263,9 @@ int Operations<I>::snap_protect(const cls::rbd::SnapshotNamespace& snap_namespac
12861263
}
12871264

12881265
r = cond_ctx.wait();
1289-
if (r < 0) {
1290-
return r;
1291-
}
12921266
}
1293-
return 0;
1267+
1268+
return r;
12941269
}
12951270

12961271
template <typename I>
@@ -1373,9 +1348,6 @@ int Operations<I>::snap_unprotect(const cls::rbd::SnapshotNamespace& snap_namesp
13731348
boost::bind(&ImageWatcher<I>::notify_snap_unprotect,
13741349
m_image_ctx.image_watcher, request_id,
13751350
snap_namespace, snap_name, _1));
1376-
if (r < 0 && r != -EINVAL) {
1377-
return r;
1378-
}
13791351
} else {
13801352
C_SaferCond cond_ctx;
13811353
{
@@ -1384,11 +1356,9 @@ int Operations<I>::snap_unprotect(const cls::rbd::SnapshotNamespace& snap_namesp
13841356
}
13851357

13861358
r = cond_ctx.wait();
1387-
if (r < 0) {
1388-
return r;
1389-
}
13901359
}
1391-
return 0;
1360+
1361+
return r;
13921362
}
13931363

13941364
template <typename I>
@@ -1709,9 +1679,6 @@ int Operations<I>::metadata_remove(const std::string &key) {
17091679
boost::bind(&ImageWatcher<I>::notify_metadata_remove,
17101680
m_image_ctx.image_watcher, request_id,
17111681
key, _1));
1712-
if (r == -ENOENT) {
1713-
r = 0;
1714-
}
17151682

17161683
std::string config_key;
17171684
if (util::is_metadata_config_override(key, &config_key) && r >= 0) {
@@ -1775,11 +1742,8 @@ int Operations<I>::migrate(ProgressContext &prog_ctx) {
17751742
m_image_ctx.image_watcher, request_id,
17761743
boost::ref(prog_ctx), _1));
17771744

1778-
if (r < 0 && r != -EINVAL) {
1779-
return r;
1780-
}
17811745
ldout(cct, 20) << "migrate finished" << dendl;
1782-
return 0;
1746+
return r;
17831747
}
17841748

17851749
template <typename I>
@@ -1842,11 +1806,9 @@ int Operations<I>::sparsify(size_t sparse_size, ProgressContext &prog_ctx) {
18421806
m_image_ctx.image_watcher,
18431807
request_id, sparse_size,
18441808
boost::ref(prog_ctx), _1));
1845-
if (r < 0 && r != -EINVAL) {
1846-
return r;
1847-
}
1809+
18481810
ldout(cct, 20) << "resparsify finished" << dendl;
1849-
return 0;
1811+
return r;
18501812
}
18511813

18521814
template <typename I>
@@ -1934,7 +1896,7 @@ int Operations<I>::invoke_async_request(
19341896
permit_snapshot,
19351897
local_request,
19361898
remote_request,
1937-
{}, &ctx);
1899+
&ctx);
19381900
req->send();
19391901
return ctx.wait();
19401902
}

0 commit comments

Comments
 (0)