Skip to content

Commit 94cc087

Browse files
jtlaytonidryomov
authored andcommitted
ceph: add new "nopagecache" option
CephFS is a bit unlike most other filesystems in that it only conditionally does buffered I/O based on the caps that it gets from the MDS. In most cases, unless there is contended access for an inode the MDS does give Fbc caps to the client, so the unbuffered codepaths are only infrequently traveled and are difficult to test. At one time, the "-o sync" mount option would give you this behavior, but that was removed in commit 7ab9b38 ("ceph: Don't use ceph-sync-mode for synchronous-fs."). Add a new mount option to tell the client to ignore Fbc caps when doing I/O, and to use the synchronous codepaths exclusively, even on non-O_DIRECT file descriptors. We already have an ioctl that forces this behavior on a per-file basis, so we can just always set the CEPH_F_SYNC flag in the file description on such mounts. Additionally, this patch also changes the client to not request Fbc when doing direct I/O. We aren't using the cache with O_DIRECT so we don't have any need for those caps. Signed-off-by: Jeff Layton <[email protected]> Acked-by: Greg Farnum <[email protected]> Reviewed-by: Venky Shankar <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 0078ea3 commit 94cc087

File tree

3 files changed

+27
-9
lines changed

3 files changed

+27
-9
lines changed

fs/ceph/file.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
204204
int fmode, bool isdir)
205205
{
206206
struct ceph_inode_info *ci = ceph_inode(inode);
207+
struct ceph_mount_options *opt =
208+
ceph_inode_to_client(&ci->vfs_inode)->mount_options;
207209
struct ceph_file_info *fi;
208210

209211
dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
225227
if (!fi)
226228
return -ENOMEM;
227229

230+
if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
231+
fi->flags |= CEPH_F_SYNC;
232+
228233
file->private_data = fi;
229234
}
230235

@@ -1536,7 +1541,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
15361541
struct ceph_inode_info *ci = ceph_inode(inode);
15371542
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
15381543
ssize_t ret;
1539-
int want, got = 0;
1544+
int want = 0, got = 0;
15401545
int retry_op = 0, read = 0;
15411546

15421547
again:
@@ -1551,13 +1556,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
15511556
else
15521557
ceph_start_io_read(inode);
15531558

1559+
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
1560+
want |= CEPH_CAP_FILE_CACHE;
15541561
if (fi->fmode & CEPH_FILE_MODE_LAZY)
1555-
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
1556-
else
1557-
want = CEPH_CAP_FILE_CACHE;
1562+
want |= CEPH_CAP_FILE_LAZYIO;
1563+
15581564
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
15591565
if (ret < 0) {
1560-
if (iocb->ki_flags & IOCB_DIRECT)
1566+
if (direct_lock)
15611567
ceph_end_io_direct(inode);
15621568
else
15631569
ceph_end_io_read(inode);
@@ -1691,7 +1697,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
16911697
struct ceph_osd_client *osdc = &fsc->client->osdc;
16921698
struct ceph_cap_flush *prealloc_cf;
16931699
ssize_t count, written = 0;
1694-
int err, want, got;
1700+
int err, want = 0, got;
16951701
bool direct_lock = false;
16961702
u32 map_flags;
16971703
u64 pool_flags;
@@ -1766,10 +1772,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
17661772

17671773
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
17681774
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
1775+
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
1776+
want |= CEPH_CAP_FILE_BUFFER;
17691777
if (fi->fmode & CEPH_FILE_MODE_LAZY)
1770-
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
1771-
else
1772-
want = CEPH_CAP_FILE_BUFFER;
1778+
want |= CEPH_CAP_FILE_LAZYIO;
17731779
got = 0;
17741780
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
17751781
if (err < 0)

fs/ceph/super.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ enum {
160160
Opt_quotadf,
161161
Opt_copyfrom,
162162
Opt_wsync,
163+
Opt_pagecache,
163164
};
164165

165166
enum ceph_recover_session_mode {
@@ -201,6 +202,7 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
201202
fsparam_string ("mon_addr", Opt_mon_addr),
202203
fsparam_u32 ("wsize", Opt_wsize),
203204
fsparam_flag_no ("wsync", Opt_wsync),
205+
fsparam_flag_no ("pagecache", Opt_pagecache),
204206
{}
205207
};
206208

@@ -564,6 +566,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
564566
else
565567
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
566568
break;
569+
case Opt_pagecache:
570+
if (result.negated)
571+
fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
572+
else
573+
fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
574+
break;
567575
default:
568576
BUG();
569577
}
@@ -699,6 +707,9 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
699707
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
700708
seq_puts(m, ",wsync");
701709

710+
if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
711+
seq_puts(m, ",nopagecache");
712+
702713
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
703714
seq_printf(m, ",wsize=%u", fsopt->wsize);
704715
if (fsopt->rsize != CEPH_MAX_READ_SIZE)

fs/ceph/super.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
4747
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
4848
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
49+
#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
4950

5051
#define CEPH_MOUNT_OPT_DEFAULT \
5152
(CEPH_MOUNT_OPT_DCACHE | \

0 commit comments

Comments
 (0)