Skip to content

Commit a81bc31

Browse files
jtlaytonidryomov
authored andcommitted
ceph: take the inode lock before acquiring cap refs
Most of the time, we (or the vfs layer) takes the inode_lock and then acquires caps, but ceph_read_iter does the opposite, and that can lead to a deadlock. When there are multiple clients treading over the same data, we can end up in a situation where a reader takes caps and then tries to acquire the inode_lock. Another task holds the inode_lock and issues a request to the MDS which needs to revoke the caps, but that can't happen until the inode_lock is unwedged. Fix this by having ceph_read_iter take the inode_lock earlier, before attempting to acquire caps. Fixes: 321fe13 ("ceph: add buffered/direct exclusionary locking for reads and writes") Link: https://tracker.ceph.com/issues/36348 Signed-off-by: Jeff Layton <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 31f4f5b commit a81bc31

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

fs/ceph/file.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,14 +1264,24 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
12641264
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
12651265
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
12661266

1267+
if (iocb->ki_flags & IOCB_DIRECT)
1268+
ceph_start_io_direct(inode);
1269+
else
1270+
ceph_start_io_read(inode);
1271+
12671272
if (fi->fmode & CEPH_FILE_MODE_LAZY)
12681273
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
12691274
else
12701275
want = CEPH_CAP_FILE_CACHE;
12711276
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
12721277
&got, &pinned_page);
1273-
if (ret < 0)
1278+
if (ret < 0) {
1279+
if (iocb->ki_flags & IOCB_DIRECT)
1280+
ceph_end_io_direct(inode);
1281+
else
1282+
ceph_end_io_read(inode);
12741283
return ret;
1284+
}
12751285

12761286
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
12771287
(iocb->ki_flags & IOCB_DIRECT) ||
@@ -1283,16 +1293,12 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
12831293

12841294
if (ci->i_inline_version == CEPH_INLINE_NONE) {
12851295
if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
1286-
ceph_start_io_direct(inode);
12871296
ret = ceph_direct_read_write(iocb, to,
12881297
NULL, NULL);
1289-
ceph_end_io_direct(inode);
12901298
if (ret >= 0 && ret < len)
12911299
retry_op = CHECK_EOF;
12921300
} else {
1293-
ceph_start_io_read(inode);
12941301
ret = ceph_sync_read(iocb, to, &retry_op);
1295-
ceph_end_io_read(inode);
12961302
}
12971303
} else {
12981304
retry_op = READ_INLINE;
@@ -1303,18 +1309,23 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
13031309
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
13041310
ceph_cap_string(got));
13051311
ceph_add_rw_context(fi, &rw_ctx);
1306-
ceph_start_io_read(inode);
13071312
ret = generic_file_read_iter(iocb, to);
1308-
ceph_end_io_read(inode);
13091313
ceph_del_rw_context(fi, &rw_ctx);
13101314
}
1315+
13111316
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
13121317
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
13131318
if (pinned_page) {
13141319
put_page(pinned_page);
13151320
pinned_page = NULL;
13161321
}
13171322
ceph_put_cap_refs(ci, got);
1323+
1324+
if (iocb->ki_flags & IOCB_DIRECT)
1325+
ceph_end_io_direct(inode);
1326+
else
1327+
ceph_end_io_read(inode);
1328+
13181329
if (retry_op > HAVE_RETRIED && ret >= 0) {
13191330
int statret;
13201331
struct page *page = NULL;

0 commit comments

Comments
 (0)