Skip to content

Commit 1980b1b

Browse files
lxbszidryomov
authored andcommitted
ceph: stop forwarding the request when exceeding 256 times
The type of 'num_fwd' in ceph 'MClientRequestForward' is 'int32_t', while in 'ceph_mds_request_head' the type is '__u8'. So in case the request bounces between MDSes exceeding 256 times, the client will get stuck. In this case it's ususally a bug in MDS and continue bouncing the request makes no sense. URL: https://tracker.ceph.com/issues/55130 Signed-off-by: Xiubo Li <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Reviewed-by: Luís Henriques <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 6c1dc50 commit 1980b1b

File tree

1 file changed

+34
-5
lines changed

1 file changed

+34
-5
lines changed

fs/ceph/mds_client.c

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3265,6 +3265,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
32653265
int err = -EINVAL;
32663266
void *p = msg->front.iov_base;
32673267
void *end = p + msg->front.iov_len;
3268+
bool aborted = false;
32683269

32693270
ceph_decode_need(&p, end, 2*sizeof(u32), bad);
32703271
next_mds = ceph_decode_32(&p);
@@ -3273,16 +3274,41 @@ static void handle_forward(struct ceph_mds_client *mdsc,
32733274
mutex_lock(&mdsc->mutex);
32743275
req = lookup_get_request(mdsc, tid);
32753276
if (!req) {
3277+
mutex_unlock(&mdsc->mutex);
32763278
dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
3277-
goto out; /* dup reply? */
3279+
return; /* dup reply? */
32783280
}
32793281

32803282
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
32813283
dout("forward tid %llu aborted, unregistering\n", tid);
32823284
__unregister_request(mdsc, req);
32833285
} else if (fwd_seq <= req->r_num_fwd) {
3284-
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
3285-
tid, next_mds, req->r_num_fwd, fwd_seq);
3286+
/*
3287+
* The type of 'num_fwd' in ceph 'MClientRequestForward'
3288+
* is 'int32_t', while in 'ceph_mds_request_head' the
3289+
* type is '__u8'. So in case the request bounces between
3290+
* MDSes exceeding 256 times, the client will get stuck.
3291+
*
3292+
* In this case it's ususally a bug in MDS and continue
3293+
* bouncing the request makes no sense.
3294+
*
3295+
* In future this could be fixed in ceph code, so avoid
3296+
* using the hardcode here.
3297+
*/
3298+
int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
3299+
max = 1 << (max * BITS_PER_BYTE);
3300+
if (req->r_num_fwd >= max) {
3301+
mutex_lock(&req->r_fill_mutex);
3302+
req->r_err = -EMULTIHOP;
3303+
set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
3304+
mutex_unlock(&req->r_fill_mutex);
3305+
aborted = true;
3306+
pr_warn_ratelimited("forward tid %llu seq overflow\n",
3307+
tid);
3308+
} else {
3309+
dout("forward tid %llu to mds%d - old seq %d <= %d\n",
3310+
tid, next_mds, req->r_num_fwd, fwd_seq);
3311+
}
32863312
} else {
32873313
/* resend. forward race not possible; mds would drop */
32883314
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@@ -3294,9 +3320,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
32943320
put_request_session(req);
32953321
__do_request(mdsc, req);
32963322
}
3297-
ceph_mdsc_put_request(req);
3298-
out:
32993323
mutex_unlock(&mdsc->mutex);
3324+
3325+
/* kick calling process */
3326+
if (aborted)
3327+
complete_request(mdsc, req);
3328+
ceph_mdsc_put_request(req);
33003329
return;
33013330

33023331
bad:

0 commit comments

Comments
 (0)