Skip to content

Commit 719a251

Browse files
ukernelidryomov
authored andcommitted
ceph: consider inode's last read/write when calculating wanted caps
Add i_last_rd and i_last_wr to ceph_inode_info. These fields are used to track the last time the client acquired read/write caps for the inode. If there is no read/write on an inode for 'caps_wanted_delay_max' seconds, __ceph_caps_file_wanted() does not request caps for read/write even there are open files. Call __ceph_touch_fmode() for dir operations. __ceph_caps_file_wanted() calculates dir's wanted caps according to last dir read/modification. If there is recent dir read, dir inode wants CEPH_CAP_ANY_SHARED caps. If there is recent dir modification, also wants CEPH_CAP_FILE_EXCL. Readdir is a special case. Dir inode wants CEPH_CAP_FILE_EXCL after readdir, as with that, modifications do not need to release CEPH_CAP_FILE_SHARED or invalidate all dentry leases issued by readdir. Signed-off-by: "Yan, Zheng" <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent c0e385b commit 719a251

File tree

8 files changed

+188
-74
lines changed

8 files changed

+188
-74
lines changed

fs/ceph/caps.c

Lines changed: 129 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -978,19 +978,67 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
978978
return used;
979979
}
980980

981+
#define FMODE_WAIT_BIAS 1000
982+
981983
/*
982984
* wanted, by virtue of open file modes
983985
*/
984986
int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
985987
{
986-
int i, bits = 0;
987-
for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
988-
if (ci->i_nr_by_mode[i])
989-
bits |= 1 << i;
988+
const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN);
989+
const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD);
990+
const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
991+
const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
992+
struct ceph_mount_options *opt =
993+
ceph_inode_to_client(&ci->vfs_inode)->mount_options;
994+
unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
995+
unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
996+
997+
if (S_ISDIR(ci->vfs_inode.i_mode)) {
998+
int want = 0;
999+
1000+
/* use used_cutoff here, to keep dir's wanted caps longer */
1001+
if (ci->i_nr_by_mode[RD_SHIFT] > 0 ||
1002+
time_after(ci->i_last_rd, used_cutoff))
1003+
want |= CEPH_CAP_ANY_SHARED;
1004+
1005+
if (ci->i_nr_by_mode[WR_SHIFT] > 0 ||
1006+
time_after(ci->i_last_wr, used_cutoff)) {
1007+
want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
1008+
if (opt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
1009+
want |= CEPH_CAP_ANY_DIR_OPS;
1010+
}
1011+
1012+
if (want || ci->i_nr_by_mode[PIN_SHIFT] > 0)
1013+
want |= CEPH_CAP_PIN;
1014+
1015+
return want;
1016+
} else {
1017+
int bits = 0;
1018+
1019+
if (ci->i_nr_by_mode[RD_SHIFT] > 0) {
1020+
if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS ||
1021+
time_after(ci->i_last_rd, used_cutoff))
1022+
bits |= 1 << RD_SHIFT;
1023+
} else if (time_after(ci->i_last_rd, idle_cutoff)) {
1024+
bits |= 1 << RD_SHIFT;
1025+
}
1026+
1027+
if (ci->i_nr_by_mode[WR_SHIFT] > 0) {
1028+
if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS ||
1029+
time_after(ci->i_last_wr, used_cutoff))
1030+
bits |= 1 << WR_SHIFT;
1031+
} else if (time_after(ci->i_last_wr, idle_cutoff)) {
1032+
bits |= 1 << WR_SHIFT;
1033+
}
1034+
1035+
/* check lazyio only when read/write is wanted */
1036+
if ((bits & (CEPH_FILE_MODE_RDWR << 1)) &&
1037+
ci->i_nr_by_mode[LAZY_SHIFT] > 0)
1038+
bits |= 1 << LAZY_SHIFT;
1039+
1040+
return bits ? ceph_caps_for_mode(bits >> 1) : 0;
9901041
}
991-
if (bits == 0)
992-
return 0;
993-
return ceph_caps_for_mode(bits >> 1);
9941042
}
9951043

9961044
/*
@@ -1032,14 +1080,6 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
10321080
return mds_wanted;
10331081
}
10341082

1035-
/*
1036-
* called under i_ceph_lock
1037-
*/
1038-
static int __ceph_is_single_caps(struct ceph_inode_info *ci)
1039-
{
1040-
return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
1041-
}
1042-
10431083
int ceph_is_any_caps(struct inode *inode)
10441084
{
10451085
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1877,10 +1917,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
18771917
if (ci->i_ceph_flags & CEPH_I_FLUSH)
18781918
flags |= CHECK_CAPS_FLUSH;
18791919

1880-
if (!(flags & CHECK_CAPS_AUTHONLY) ||
1881-
(ci->i_auth_cap && __ceph_is_single_caps(ci)))
1882-
__cap_delay_cancel(mdsc, ci);
1883-
18841920
goto retry_locked;
18851921
retry:
18861922
spin_lock(&ci->i_ceph_lock);
@@ -1907,9 +1943,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
19071943
if (IS_RDONLY(inode)) {
19081944
want = CEPH_CAP_ANY_SHARED;
19091945
} else {
1910-
want = CEPH_CAP_ANY_SHARED |
1911-
CEPH_CAP_FILE_EXCL |
1912-
CEPH_CAP_ANY_DIR_OPS;
1946+
want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
19131947
}
19141948
retain |= want;
19151949
} else {
@@ -2105,9 +2139,17 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
21052139
goto retry; /* retake i_ceph_lock and restart our cap scan. */
21062140
}
21072141

2108-
/* Reschedule delayed caps release if we delayed anything */
2109-
if (delayed)
2110-
__cap_delay_requeue(mdsc, ci, false);
2142+
if (list_empty(&ci->i_cap_delay_list)) {
2143+
if (delayed) {
2144+
/* Reschedule delayed caps release if we delayed anything */
2145+
__cap_delay_requeue(mdsc, ci, false);
2146+
} else if (__ceph_is_any_real_caps(ci) &&
2147+
(file_wanted & ~CEPH_CAP_PIN) &&
2148+
!(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
2149+
/* periodically re-calculate caps wanted by open files */
2150+
__cap_delay_requeue(mdsc, ci, true);
2151+
}
2152+
}
21112153

21122154
spin_unlock(&ci->i_ceph_lock);
21132155

@@ -2573,8 +2615,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
25732615
* FIXME: how does a 0 return differ from -EAGAIN?
25742616
*/
25752617
enum {
2576-
NON_BLOCKING = 1,
2577-
CHECK_FILELOCK = 2,
2618+
/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
2619+
NON_BLOCKING = (1 << 8),
2620+
CHECK_FILELOCK = (1 << 9),
25782621
};
25792622

25802623
static int try_get_cap_refs(struct inode *inode, int need, int want,
@@ -2584,7 +2627,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
25842627
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
25852628
int ret = 0;
25862629
int have, implemented;
2587-
int file_wanted;
25882630
bool snap_rwsem_locked = false;
25892631

25902632
dout("get_cap_refs %p need %s want %s\n", inode,
@@ -2600,15 +2642,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
26002642
goto out_unlock;
26012643
}
26022644

2603-
/* make sure file is actually open */
2604-
file_wanted = __ceph_caps_file_wanted(ci);
2605-
if ((file_wanted & need) != need) {
2606-
dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
2607-
ceph_cap_string(need), ceph_cap_string(file_wanted));
2608-
ret = -EBADF;
2609-
goto out_unlock;
2610-
}
2611-
26122645
/* finish pending truncate */
26132646
while (ci->i_truncate_pending) {
26142647
spin_unlock(&ci->i_ceph_lock);
@@ -2719,6 +2752,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
27192752
ceph_cap_string(have), ceph_cap_string(need));
27202753
}
27212754
out_unlock:
2755+
2756+
__ceph_touch_fmode(ci, mdsc, flags);
2757+
27222758
spin_unlock(&ci->i_ceph_lock);
27232759
if (snap_rwsem_locked)
27242760
up_read(&mdsc->snap_rwsem);
@@ -2756,10 +2792,20 @@ static void check_max_size(struct inode *inode, loff_t endoff)
27562792
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
27572793
}
27582794

2795+
static inline int get_used_fmode(int caps)
2796+
{
2797+
int fmode = 0;
2798+
if (caps & CEPH_CAP_FILE_RD)
2799+
fmode |= CEPH_FILE_MODE_RD;
2800+
if (caps & CEPH_CAP_FILE_WR)
2801+
fmode |= CEPH_FILE_MODE_WR;
2802+
return fmode;
2803+
}
2804+
27592805
int ceph_try_get_caps(struct inode *inode, int need, int want,
27602806
bool nonblock, int *got)
27612807
{
2762-
int ret;
2808+
int ret, flags;
27632809

27642810
BUG_ON(need & ~CEPH_CAP_FILE_RD);
27652811
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
@@ -2771,8 +2817,11 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
27712817
return ret;
27722818
}
27732819

2774-
ret = try_get_cap_refs(inode, need, want, 0,
2775-
(nonblock ? NON_BLOCKING : 0), got);
2820+
flags = get_used_fmode(need | want);
2821+
if (nonblock)
2822+
flags |= NON_BLOCKING;
2823+
2824+
ret = try_get_cap_refs(inode, need, want, 0, flags, got);
27762825
return ret == -EAGAIN ? 0 : ret;
27772826
}
27782827

@@ -2798,11 +2847,15 @@ int ceph_get_caps(struct file *filp, int need, int want,
27982847
fi->filp_gen != READ_ONCE(fsc->filp_gen))
27992848
return -EBADF;
28002849

2850+
flags = get_used_fmode(need | want);
2851+
28012852
while (true) {
28022853
if (endoff > 0)
28032854
check_max_size(inode, endoff);
28042855

2805-
flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
2856+
flags &= CEPH_FILE_MODE_MASK;
2857+
if (atomic_read(&fi->num_locks))
2858+
flags |= CHECK_FILELOCK;
28062859
_got = 0;
28072860
ret = try_get_cap_refs(inode, need, want, endoff,
28082861
flags, &_got);
@@ -2822,6 +2875,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
28222875
list_add(&cw.list, &mdsc->cap_wait_list);
28232876
spin_unlock(&mdsc->caps_list_lock);
28242877

2878+
/* make sure used fmode not timeout */
2879+
ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
28252880
add_wait_queue(&ci->i_cap_wq, &wait);
28262881

28272882
flags |= NON_BLOCKING;
@@ -2835,6 +2890,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
28352890
}
28362891

28372892
remove_wait_queue(&ci->i_cap_wq, &wait);
2893+
ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
28382894

28392895
spin_lock(&mdsc->caps_list_lock);
28402896
list_del(&cw.list);
@@ -2854,7 +2910,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
28542910
if (ret < 0) {
28552911
if (ret == -ESTALE) {
28562912
/* session was killed, try renew caps */
2857-
ret = ceph_renew_caps(inode);
2913+
ret = ceph_renew_caps(inode, flags);
28582914
if (ret == 0)
28592915
continue;
28602916
}
@@ -4153,6 +4209,33 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
41534209
dout("flush_dirty_caps done\n");
41544210
}
41554211

4212+
void __ceph_touch_fmode(struct ceph_inode_info *ci,
4213+
struct ceph_mds_client *mdsc, int fmode)
4214+
{
4215+
unsigned long now = jiffies;
4216+
if (fmode & CEPH_FILE_MODE_RD)
4217+
ci->i_last_rd = now;
4218+
if (fmode & CEPH_FILE_MODE_WR)
4219+
ci->i_last_wr = now;
4220+
/* queue periodic check */
4221+
if (fmode &&
4222+
__ceph_is_any_real_caps(ci) &&
4223+
list_empty(&ci->i_cap_delay_list))
4224+
__cap_delay_requeue(mdsc, ci, true);
4225+
}
4226+
4227+
void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
4228+
{
4229+
int i;
4230+
int bits = (fmode << 1) | 1;
4231+
spin_lock(&ci->i_ceph_lock);
4232+
for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
4233+
if (bits & (1 << i))
4234+
ci->i_nr_by_mode[i] += count;
4235+
}
4236+
spin_unlock(&ci->i_ceph_lock);
4237+
}
4238+
41564239
void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
41574240
{
41584241
int i;
@@ -4168,26 +4251,18 @@ void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
41684251
* we may need to release capabilities to the MDS (or schedule
41694252
* their delayed release).
41704253
*/
4171-
void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
4254+
void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
41724255
{
4173-
int i, last = 0;
4256+
int i;
41744257
int bits = (fmode << 1) | 1;
41754258
spin_lock(&ci->i_ceph_lock);
41764259
for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
41774260
if (bits & (1 << i)) {
4178-
BUG_ON(ci->i_nr_by_mode[i] == 0);
4179-
if (--ci->i_nr_by_mode[i] == 0)
4180-
last++;
4261+
BUG_ON(ci->i_nr_by_mode[i] < count);
4262+
ci->i_nr_by_mode[i] -= count;
41814263
}
41824264
}
4183-
dout("put_fmode %p fmode %d {%d,%d,%d,%d}\n",
4184-
&ci->vfs_inode, fmode,
4185-
ci->i_nr_by_mode[0], ci->i_nr_by_mode[1],
4186-
ci->i_nr_by_mode[2], ci->i_nr_by_mode[3]);
41874265
spin_unlock(&ci->i_ceph_lock);
4188-
4189-
if (last && ci->i_vino.snap == CEPH_NOSNAP)
4190-
ceph_check_caps(ci, 0, NULL);
41914266
}
41924267

41934268
/*

0 commit comments

Comments
 (0)