Skip to content

Commit 3290bad

Browse files
committed
Merge tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "A bunch of CephFS fixups from Xiubo, mostly around dropping caps, along with a fix for a regression in the readahead handling code which sneaked in with the switch to netfs helpers" * tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client: ceph: don't let check_caps skip sending responses for revoke msgs ceph: issue a cap release immediately if no cap exists ceph: trigger to flush the buffer when making snapshot ceph: fix blindly expanding the readahead windows ceph: add a dedicated private data for netfs rreq ceph: voluntarily drop Xx caps for requests those touch parent mtime ceph: try to dump the msgs when decoding fails ceph: only send metrics when the MDS rank is ready
2 parents 36b93ae + 257e617 commit 3290bad

File tree

8 files changed

+154
-40
lines changed

8 files changed

+154
-40
lines changed

fs/ceph/addr.c

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
187187
struct inode *inode = rreq->inode;
188188
struct ceph_inode_info *ci = ceph_inode(inode);
189189
struct ceph_file_layout *lo = &ci->i_layout;
190+
unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
191+
loff_t end = rreq->start + rreq->len, new_end;
192+
struct ceph_netfs_request_data *priv = rreq->netfs_priv;
193+
unsigned long max_len;
190194
u32 blockoff;
191-
u64 blockno;
192195

193-
/* Expand the start downward */
194-
blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
195-
rreq->start = blockno * lo->stripe_unit;
196-
rreq->len += blockoff;
196+
if (priv) {
197+
/* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
198+
if (priv->file_ra_disabled)
199+
max_pages = 0;
200+
else
201+
max_pages = priv->file_ra_pages;
202+
203+
}
197204

198-
/* Now, round up the length to the next block */
199-
rreq->len = roundup(rreq->len, lo->stripe_unit);
205+
/* Readahead is disabled */
206+
if (!max_pages)
207+
return;
208+
209+
max_len = max_pages << PAGE_SHIFT;
210+
211+
/*
212+
* Try to expand the length forward by rounding up it to the next
213+
* block, but do not exceed the file size, unless the original
214+
* request already exceeds it.
215+
*/
216+
new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
217+
if (new_end > end && new_end <= rreq->start + max_len)
218+
rreq->len = new_end - rreq->start;
219+
220+
/* Try to expand the start downward */
221+
div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
222+
if (rreq->len + blockoff <= max_len) {
223+
rreq->start -= blockoff;
224+
rreq->len += blockoff;
225+
}
200226
}
201227

202228
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
@@ -362,18 +388,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
362388
{
363389
struct inode *inode = rreq->inode;
364390
int got = 0, want = CEPH_CAP_FILE_CACHE;
391+
struct ceph_netfs_request_data *priv;
365392
int ret = 0;
366393

367394
if (rreq->origin != NETFS_READAHEAD)
368395
return 0;
369396

397+
priv = kzalloc(sizeof(*priv), GFP_NOFS);
398+
if (!priv)
399+
return -ENOMEM;
400+
370401
if (file) {
371402
struct ceph_rw_context *rw_ctx;
372403
struct ceph_file_info *fi = file->private_data;
373404

405+
priv->file_ra_pages = file->f_ra.ra_pages;
406+
priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
407+
374408
rw_ctx = ceph_find_rw_context(fi);
375-
if (rw_ctx)
409+
if (rw_ctx) {
410+
rreq->netfs_priv = priv;
376411
return 0;
412+
}
377413
}
378414

379415
/*
@@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
383419
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
384420
if (ret < 0) {
385421
dout("start_read %p, error getting cap\n", inode);
386-
return ret;
422+
goto out;
387423
}
388424

389425
if (!(got & want)) {
390426
dout("start_read %p, no cache cap\n", inode);
391-
return -EACCES;
427+
ret = -EACCES;
428+
goto out;
429+
}
430+
if (ret == 0) {
431+
ret = -EACCES;
432+
goto out;
392433
}
393-
if (ret == 0)
394-
return -EACCES;
395434

396-
rreq->netfs_priv = (void *)(uintptr_t)got;
397-
return 0;
435+
priv->caps = got;
436+
rreq->netfs_priv = priv;
437+
438+
out:
439+
if (ret < 0)
440+
kfree(priv);
441+
442+
return ret;
398443
}
399444

400445
static void ceph_netfs_free_request(struct netfs_io_request *rreq)
401446
{
402-
struct ceph_inode_info *ci = ceph_inode(rreq->inode);
403-
int got = (uintptr_t)rreq->netfs_priv;
447+
struct ceph_netfs_request_data *priv = rreq->netfs_priv;
448+
449+
if (!priv)
450+
return;
404451

405-
if (got)
406-
ceph_put_cap_refs(ci, got);
452+
if (priv->caps)
453+
ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
454+
kfree(priv);
455+
rreq->netfs_priv = NULL;
407456
}
408457

409458
const struct netfs_request_ops ceph_netfs_ops = {

fs/ceph/caps.c

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
31093109
}
31103110
if (had & CEPH_CAP_FILE_WR) {
31113111
if (--ci->i_wr_ref == 0) {
3112+
/*
3113+
* The Fb caps will always be took and released
3114+
* together with the Fw caps.
3115+
*/
3116+
WARN_ON_ONCE(ci->i_wb_ref);
3117+
31123118
last++;
31133119
check_flushsnaps = true;
31143120
if (ci->i_wrbuffer_ref_head == 0 &&
@@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode,
35603566
}
35613567
BUG_ON(cap->issued & ~cap->implemented);
35623568

3569+
/* don't let check_caps skip sending a response to MDS for revoke msgs */
3570+
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
3571+
cap->mds_wanted = 0;
3572+
if (cap == ci->i_auth_cap)
3573+
check_caps = 1; /* check auth cap only */
3574+
else
3575+
check_caps = 2; /* check all caps */
3576+
}
3577+
35633578
if (extra_info->inline_version > 0 &&
35643579
extra_info->inline_version >= ci->i_inline_version) {
35653580
ci->i_inline_version = extra_info->inline_version;
@@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
40864101
struct cap_extra_info extra_info = {};
40874102
bool queue_trunc;
40884103
bool close_sessions = false;
4104+
bool do_cap_release = false;
40894105

40904106
dout("handle_caps from mds%d\n", session->s_mds);
40914107

@@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
41924208
if (!inode) {
41934209
dout(" i don't have ino %llx\n", vino.ino);
41944210

4195-
if (op == CEPH_CAP_OP_IMPORT) {
4196-
cap = ceph_get_cap(mdsc, NULL);
4197-
cap->cap_ino = vino.ino;
4198-
cap->queue_release = 1;
4199-
cap->cap_id = le64_to_cpu(h->cap_id);
4200-
cap->mseq = mseq;
4201-
cap->seq = seq;
4202-
cap->issue_seq = seq;
4203-
spin_lock(&session->s_cap_lock);
4204-
__ceph_queue_cap_release(session, cap);
4205-
spin_unlock(&session->s_cap_lock);
4211+
switch (op) {
4212+
case CEPH_CAP_OP_IMPORT:
4213+
case CEPH_CAP_OP_REVOKE:
4214+
case CEPH_CAP_OP_GRANT:
4215+
do_cap_release = true;
4216+
break;
4217+
default:
4218+
break;
42064219
}
42074220
goto flush_cap_releases;
42084221
}
@@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
42524265
inode, ceph_ino(inode), ceph_snap(inode),
42534266
session->s_mds);
42544267
spin_unlock(&ci->i_ceph_lock);
4268+
switch (op) {
4269+
case CEPH_CAP_OP_REVOKE:
4270+
case CEPH_CAP_OP_GRANT:
4271+
do_cap_release = true;
4272+
break;
4273+
default:
4274+
break;
4275+
}
42554276
goto flush_cap_releases;
42564277
}
42574278

@@ -4302,6 +4323,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
43024323
* along for the mds (who clearly thinks we still have this
43034324
* cap).
43044325
*/
4326+
if (do_cap_release) {
4327+
cap = ceph_get_cap(mdsc, NULL);
4328+
cap->cap_ino = vino.ino;
4329+
cap->queue_release = 1;
4330+
cap->cap_id = le64_to_cpu(h->cap_id);
4331+
cap->mseq = mseq;
4332+
cap->seq = seq;
4333+
cap->issue_seq = seq;
4334+
spin_lock(&session->s_cap_lock);
4335+
__ceph_queue_cap_release(session, cap);
4336+
spin_unlock(&session->s_cap_lock);
4337+
}
43054338
ceph_flush_cap_releases(mdsc, session);
43064339
goto done;
43074340

fs/ceph/dir.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
886886
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
887887
req->r_args.mknod.mode = cpu_to_le32(mode);
888888
req->r_args.mknod.rdev = cpu_to_le32(rdev);
889-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
889+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
890+
CEPH_CAP_XATTR_EXCL;
890891
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
891892
if (as_ctx.pagelist) {
892893
req->r_pagelist = as_ctx.pagelist;
@@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
953954
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
954955
req->r_dentry = dget(dentry);
955956
req->r_num_caps = 2;
956-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
957+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
958+
CEPH_CAP_XATTR_EXCL;
957959
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
958960
if (as_ctx.pagelist) {
959961
req->r_pagelist = as_ctx.pagelist;
@@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
10221024
ihold(dir);
10231025
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
10241026
req->r_args.mkdir.mode = cpu_to_le32(mode);
1025-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
1027+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
1028+
CEPH_CAP_XATTR_EXCL;
10261029
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
10271030
if (as_ctx.pagelist) {
10281031
req->r_pagelist = as_ctx.pagelist;
@@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
10791082
req->r_parent = dir;
10801083
ihold(dir);
10811084
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1082-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
1085+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
10831086
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
10841087
/* release LINK_SHARED on source inode (mds will lock it) */
10851088
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
@@ -1218,7 +1221,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
12181221
req->r_num_caps = 2;
12191222
req->r_parent = dir;
12201223
ihold(dir);
1221-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
1224+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
12221225
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
12231226
req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
12241227

@@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
13201323
req->r_parent = new_dir;
13211324
ihold(new_dir);
13221325
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
1323-
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
1326+
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
13241327
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
1325-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
1328+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
13261329
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
13271330
/* release LINK_RDCACHE on source inode (mds will lock it) */
13281331
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;

fs/ceph/file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
791791
if (flags & O_CREAT) {
792792
struct ceph_file_layout lo;
793793

794-
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
794+
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
795+
CEPH_CAP_XATTR_EXCL;
795796
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
796797
if (as_ctx.pagelist) {
797798
req->r_pagelist = as_ctx.pagelist;

fs/ceph/mds_client.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
645645
err = -EIO;
646646
out_bad:
647647
pr_err("mds parse_reply err %d\n", err);
648+
ceph_msg_dump(msg);
648649
return err;
649650
}
650651

@@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
35383539

35393540
bad:
35403541
pr_err("mdsc_handle_forward decode error err=%d\n", err);
3542+
ceph_msg_dump(msg);
35413543
}
35423544

35433545
static int __decode_session_metadata(void **p, void *end,
@@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
52585260
bad:
52595261
pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
52605262
ceph_umount_begin(mdsc->fsc->sb);
5263+
ceph_msg_dump(msg);
52615264
err_out:
52625265
mutex_lock(&mdsc->mutex);
52635266
mdsc->mdsmap_err = err;
@@ -5326,6 +5329,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
53265329
bad:
53275330
pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
53285331
ceph_umount_begin(mdsc->fsc->sb);
5332+
ceph_msg_dump(msg);
53295333
return;
53305334
}
53315335

fs/ceph/metric.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
3636
s32 items = 0;
3737
s32 len;
3838

39+
/* Do not send the metrics until the MDS rank is ready */
40+
mutex_lock(&mdsc->mutex);
41+
if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) {
42+
mutex_unlock(&mdsc->mutex);
43+
return false;
44+
}
45+
mutex_unlock(&mdsc->mutex);
46+
3947
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
4048
+ sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
4149
+ sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize)

fs/ceph/snap.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
675675
return 0;
676676
}
677677

678-
/* Fb cap still in use, delay it */
679-
if (ci->i_wb_ref) {
678+
/*
679+
* Defer flushing the capsnap if the dirty buffer not flushed yet.
680+
* And trigger to flush the buffer immediately.
681+
*/
682+
if (ci->i_wrbuffer_ref) {
680683
dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
681684
"used WRBUFFER, delaying\n", __func__, inode,
682685
ceph_vinop(inode), capsnap, capsnap->context,
683686
capsnap->context->seq, ceph_cap_string(capsnap->dirty),
684687
capsnap->size);
685-
capsnap->writing = 1;
688+
ceph_queue_writeback(inode);
686689
return 0;
687690
}
688691

fs/ceph/super.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,19 @@ struct ceph_inode_info {
451451
unsigned long i_work_mask;
452452
};
453453

454+
struct ceph_netfs_request_data {
455+
int caps;
456+
457+
/*
458+
* Maximum size of a file readahead request.
459+
* The fadvise could update the bdi's default ra_pages.
460+
*/
461+
unsigned int file_ra_pages;
462+
463+
/* Set it if fadvise disables file readahead entirely */
464+
bool file_ra_disabled;
465+
};
466+
454467
static inline struct ceph_inode_info *
455468
ceph_inode(const struct inode *inode)
456469
{

0 commit comments

Comments
 (0)