Skip to content

Commit 5cd64d4

Browse files
committed
Merge tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "A fix for a race condition around r_parent tracking that took a long time to track down from Alex and some fixes for potential crashes on accessing invalid memory from Max and myself. All marked for stable" * tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client: libceph: fix invalid accesses to ceph_connection_v1_info ceph: fix crash after fscrypt_encrypt_pagecache_blocks() error ceph: always call ceph_shift_unused_folios_left() ceph: fix race condition where r_parent becomes stale before sending message ceph: fix race condition validating r_parent before applying state
2 parents 395d68e + cdbc983 commit 5cd64d4

File tree

8 files changed

+223
-126
lines changed

8 files changed

+223
-126
lines changed

fs/ceph/addr.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,7 +1264,9 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
12641264
0,
12651265
gfp_flags);
12661266
if (IS_ERR(pages[index])) {
1267-
if (PTR_ERR(pages[index]) == -EINVAL) {
1267+
int err = PTR_ERR(pages[index]);
1268+
1269+
if (err == -EINVAL) {
12681270
pr_err_client(cl, "inode->i_blkbits=%hhu\n",
12691271
inode->i_blkbits);
12701272
}
@@ -1273,7 +1275,7 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
12731275
BUG_ON(ceph_wbc->locked_pages == 0);
12741276

12751277
pages[index] = NULL;
1276-
return PTR_ERR(pages[index]);
1278+
return err;
12771279
}
12781280
} else {
12791281
pages[index] = &folio->page;
@@ -1687,6 +1689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
16871689

16881690
process_folio_batch:
16891691
rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc);
1692+
ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
16901693
if (rc)
16911694
goto release_folios;
16921695

@@ -1695,8 +1698,6 @@ static int ceph_writepages_start(struct address_space *mapping,
16951698
goto release_folios;
16961699

16971700
if (ceph_wbc.processed_in_fbatch) {
1698-
ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
1699-
17001701
if (folio_batch_count(&ceph_wbc.fbatch) == 0 &&
17011702
ceph_wbc.locked_pages < ceph_wbc.max_pages) {
17021703
doutc(cl, "reached end fbatch, trying for more\n");

fs/ceph/debugfs.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p)
5555
struct ceph_mds_client *mdsc = fsc->mdsc;
5656
struct ceph_mds_request *req;
5757
struct rb_node *rp;
58-
int pathlen = 0;
59-
u64 pathbase;
6058
char *path;
6159

6260
mutex_lock(&mdsc->mutex);
@@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p)
8179
if (req->r_inode) {
8280
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
8381
} else if (req->r_dentry) {
84-
path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
85-
&pathbase, 0);
82+
struct ceph_path_info path_info;
83+
path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
8684
if (IS_ERR(path))
8785
path = NULL;
8886
spin_lock(&req->r_dentry->d_lock);
@@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p)
9189
req->r_dentry,
9290
path ? path : "");
9391
spin_unlock(&req->r_dentry->d_lock);
94-
ceph_mdsc_free_path(path, pathlen);
92+
ceph_mdsc_free_path_info(&path_info);
9593
} else if (req->r_path1) {
9694
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
9795
req->r_path1);
@@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p)
10098
}
10199

102100
if (req->r_old_dentry) {
103-
path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen,
104-
&pathbase, 0);
101+
struct ceph_path_info path_info;
102+
path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0);
105103
if (IS_ERR(path))
106104
path = NULL;
107105
spin_lock(&req->r_old_dentry->d_lock);
@@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p)
111109
req->r_old_dentry,
112110
path ? path : "");
113111
spin_unlock(&req->r_old_dentry->d_lock);
114-
ceph_mdsc_free_path(path, pathlen);
112+
ceph_mdsc_free_path_info(&path_info);
115113
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
116114
if (req->r_ino2.ino)
117115
seq_printf(s, " #%llx/%s", req->r_ino2.ino,

fs/ceph/dir.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,10 +1271,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
12711271

12721272
/* If op failed, mark everyone involved for errors */
12731273
if (result) {
1274-
int pathlen = 0;
1275-
u64 base = 0;
1276-
char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
1277-
&base, 0);
1274+
struct ceph_path_info path_info = {0};
1275+
char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
12781276

12791277
/* mark error on parent + clear complete */
12801278
mapping_set_error(req->r_parent->i_mapping, result);
@@ -1288,8 +1286,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
12881286
mapping_set_error(req->r_old_inode->i_mapping, result);
12891287

12901288
pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n",
1291-
base, IS_ERR(path) ? "<<bad>>" : path, result);
1292-
ceph_mdsc_free_path(path, pathlen);
1289+
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
1290+
ceph_mdsc_free_path_info(&path_info);
12931291
}
12941292
out:
12951293
iput(req->r_old_inode);
@@ -1347,8 +1345,6 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
13471345
int err = -EROFS;
13481346
int op;
13491347
char *path;
1350-
int pathlen;
1351-
u64 pathbase;
13521348

13531349
if (ceph_snap(dir) == CEPH_SNAPDIR) {
13541350
/* rmdir .snap/foo is RMSNAP */
@@ -1367,14 +1363,15 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
13671363
if (!dn) {
13681364
try_async = false;
13691365
} else {
1370-
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
1366+
struct ceph_path_info path_info;
1367+
path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
13711368
if (IS_ERR(path)) {
13721369
try_async = false;
13731370
err = 0;
13741371
} else {
13751372
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
13761373
}
1377-
ceph_mdsc_free_path(path, pathlen);
1374+
ceph_mdsc_free_path_info(&path_info);
13781375
dput(dn);
13791376

13801377
/* For none EACCES cases will let the MDS do the mds auth check */

fs/ceph/file.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,6 @@ int ceph_open(struct inode *inode, struct file *file)
368368
int flags, fmode, wanted;
369369
struct dentry *dentry;
370370
char *path;
371-
int pathlen;
372-
u64 pathbase;
373371
bool do_sync = false;
374372
int mask = MAY_READ;
375373

@@ -399,14 +397,15 @@ int ceph_open(struct inode *inode, struct file *file)
399397
if (!dentry) {
400398
do_sync = true;
401399
} else {
402-
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
400+
struct ceph_path_info path_info;
401+
path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
403402
if (IS_ERR(path)) {
404403
do_sync = true;
405404
err = 0;
406405
} else {
407406
err = ceph_mds_check_access(mdsc, path, mask);
408407
}
409-
ceph_mdsc_free_path(path, pathlen);
408+
ceph_mdsc_free_path_info(&path_info);
410409
dput(dentry);
411410

412411
/* For none EACCES cases will let the MDS do the mds auth check */
@@ -614,15 +613,13 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
614613
mapping_set_error(req->r_parent->i_mapping, result);
615614

616615
if (result) {
617-
int pathlen = 0;
618-
u64 base = 0;
619-
char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
620-
&base, 0);
616+
struct ceph_path_info path_info = {0};
617+
char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
621618

622619
pr_warn_client(cl,
623620
"async create failure path=(%llx)%s result=%d!\n",
624-
base, IS_ERR(path) ? "<<bad>>" : path, result);
625-
ceph_mdsc_free_path(path, pathlen);
621+
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
622+
ceph_mdsc_free_path_info(&path_info);
626623

627624
ceph_dir_clear_complete(req->r_parent);
628625
if (!d_unhashed(dentry))
@@ -791,8 +788,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
791788
int mask;
792789
int err;
793790
char *path;
794-
int pathlen;
795-
u64 pathbase;
796791

797792
doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
798793
dir, ceph_vinop(dir), dentry, dentry,
@@ -814,7 +809,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
814809
if (!dn) {
815810
try_async = false;
816811
} else {
817-
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
812+
struct ceph_path_info path_info;
813+
path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
818814
if (IS_ERR(path)) {
819815
try_async = false;
820816
err = 0;
@@ -826,7 +822,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
826822
mask |= MAY_WRITE;
827823
err = ceph_mds_check_access(mdsc, path, mask);
828824
}
829-
ceph_mdsc_free_path(path, pathlen);
825+
ceph_mdsc_free_path_info(&path_info);
830826
dput(dn);
831827

832828
/* For none EACCES cases will let the MDS do the mds auth check */

fs/ceph/inode.c

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
5555
return 0;
5656
}
5757

58+
/*
59+
* Check if the parent inode matches the vino from directory reply info
60+
*/
61+
static inline bool ceph_vino_matches_parent(struct inode *parent,
62+
struct ceph_vino vino)
63+
{
64+
return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
65+
}
66+
67+
/*
68+
* Validate that the directory inode referenced by @req->r_parent matches the
69+
* inode number and snapshot id contained in the reply's directory record. If
70+
* they do not match – which can theoretically happen if the parent dentry was
71+
* moved between the time the request was issued and the reply arrived – fall
72+
* back to looking up the correct inode in the inode cache.
73+
*
74+
* A reference is *always* returned. Callers that receive a different inode
75+
* than the original @parent are responsible for dropping the extra reference
76+
* once the reply has been processed.
77+
*/
78+
static struct inode *ceph_get_reply_dir(struct super_block *sb,
79+
struct inode *parent,
80+
struct ceph_mds_reply_info_parsed *rinfo)
81+
{
82+
struct ceph_vino vino;
83+
84+
if (unlikely(!rinfo->diri.in))
85+
return parent; /* nothing to compare against */
86+
87+
/* If we didn't have a cached parent inode to begin with, just bail out. */
88+
if (!parent)
89+
return NULL;
90+
91+
vino.ino = le64_to_cpu(rinfo->diri.in->ino);
92+
vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
93+
94+
if (likely(ceph_vino_matches_parent(parent, vino)))
95+
return parent; /* matches – use the original reference */
96+
97+
/* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */
98+
WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
99+
ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
100+
101+
return ceph_get_inode(sb, vino, NULL);
102+
}
103+
58104
/**
59105
* ceph_new_inode - allocate a new inode in advance of an expected create
60106
* @dir: parent directory for new inode
@@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15231569
struct ceph_vino tvino, dvino;
15241570
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
15251571
struct ceph_client *cl = fsc->client;
1572+
struct inode *parent_dir = NULL;
15261573
int err = 0;
15271574

15281575
doutc(cl, "%p is_dentry %d is_target %d\n", req,
@@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15361583
}
15371584

15381585
if (rinfo->head->is_dentry) {
1539-
struct inode *dir = req->r_parent;
1540-
1541-
if (dir) {
1542-
err = ceph_fill_inode(dir, NULL, &rinfo->diri,
1586+
/*
1587+
* r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
1588+
* so we need to get the correct inode
1589+
*/
1590+
parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
1591+
if (unlikely(IS_ERR(parent_dir))) {
1592+
err = PTR_ERR(parent_dir);
1593+
goto done;
1594+
}
1595+
if (parent_dir) {
1596+
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
15431597
rinfo->dirfrag, session, -1,
15441598
&req->r_caps_reservation);
15451599
if (err < 0)
@@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15481602
WARN_ON_ONCE(1);
15491603
}
15501604

1551-
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
1605+
if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
15521606
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
15531607
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
15541608
bool is_nokey = false;
15551609
struct qstr dname;
15561610
struct dentry *dn, *parent;
15571611
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
1558-
struct ceph_fname fname = { .dir = dir,
1612+
struct ceph_fname fname = { .dir = parent_dir,
15591613
.name = rinfo->dname,
15601614
.ctext = rinfo->altname,
15611615
.name_len = rinfo->dname_len,
@@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15641618
BUG_ON(!rinfo->head->is_target);
15651619
BUG_ON(req->r_dentry);
15661620

1567-
parent = d_find_any_alias(dir);
1621+
parent = d_find_any_alias(parent_dir);
15681622
BUG_ON(!parent);
15691623

1570-
err = ceph_fname_alloc_buffer(dir, &oname);
1624+
err = ceph_fname_alloc_buffer(parent_dir, &oname);
15711625
if (err < 0) {
15721626
dput(parent);
15731627
goto done;
@@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15761630
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
15771631
if (err < 0) {
15781632
dput(parent);
1579-
ceph_fname_free_buffer(dir, &oname);
1633+
ceph_fname_free_buffer(parent_dir, &oname);
15801634
goto done;
15811635
}
15821636
dname.name = oname.name;
@@ -1595,7 +1649,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15951649
dname.len, dname.name, dn);
15961650
if (!dn) {
15971651
dput(parent);
1598-
ceph_fname_free_buffer(dir, &oname);
1652+
ceph_fname_free_buffer(parent_dir, &oname);
15991653
err = -ENOMEM;
16001654
goto done;
16011655
}
@@ -1610,12 +1664,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
16101664
ceph_snap(d_inode(dn)) != tvino.snap)) {
16111665
doutc(cl, " dn %p points to wrong inode %p\n",
16121666
dn, d_inode(dn));
1613-
ceph_dir_clear_ordered(dir);
1667+
ceph_dir_clear_ordered(parent_dir);
16141668
d_delete(dn);
16151669
dput(dn);
16161670
goto retry_lookup;
16171671
}
1618-
ceph_fname_free_buffer(dir, &oname);
1672+
ceph_fname_free_buffer(parent_dir, &oname);
16191673

16201674
req->r_dentry = dn;
16211675
dput(parent);
@@ -1794,6 +1848,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
17941848
&dvino, ptvino);
17951849
}
17961850
done:
1851+
/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
1852+
if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
1853+
iput(parent_dir);
17971854
doutc(cl, "done err=%d\n", err);
17981855
return err;
17991856
}
@@ -2487,22 +2544,21 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
24872544
int truncate_retry = 20; /* The RMW will take around 50ms */
24882545
struct dentry *dentry;
24892546
char *path;
2490-
int pathlen;
2491-
u64 pathbase;
24922547
bool do_sync = false;
24932548

24942549
dentry = d_find_alias(inode);
24952550
if (!dentry) {
24962551
do_sync = true;
24972552
} else {
2498-
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
2553+
struct ceph_path_info path_info;
2554+
path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
24992555
if (IS_ERR(path)) {
25002556
do_sync = true;
25012557
err = 0;
25022558
} else {
25032559
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
25042560
}
2505-
ceph_mdsc_free_path(path, pathlen);
2561+
ceph_mdsc_free_path_info(&path_info);
25062562
dput(dentry);
25072563

25082564
/* For none EACCES cases will let the MDS do the mds auth check */

0 commit comments

Comments
 (0)