@@ -978,19 +978,67 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
978
978
return used ;
979
979
}
980
980
981
+ #define FMODE_WAIT_BIAS 1000
982
+
981
983
/*
982
984
* wanted, by virtue of open file modes
983
985
*/
984
986
int __ceph_caps_file_wanted (struct ceph_inode_info * ci )
985
987
{
986
- int i , bits = 0 ;
987
- for (i = 0 ; i < CEPH_FILE_MODE_BITS ; i ++ ) {
988
- if (ci -> i_nr_by_mode [i ])
989
- bits |= 1 << i ;
988
+ const int PIN_SHIFT = ffs (CEPH_FILE_MODE_PIN );
989
+ const int RD_SHIFT = ffs (CEPH_FILE_MODE_RD );
990
+ const int WR_SHIFT = ffs (CEPH_FILE_MODE_WR );
991
+ const int LAZY_SHIFT = ffs (CEPH_FILE_MODE_LAZY );
992
+ struct ceph_mount_options * opt =
993
+ ceph_inode_to_client (& ci -> vfs_inode )-> mount_options ;
994
+ unsigned long used_cutoff = jiffies - opt -> caps_wanted_delay_max * HZ ;
995
+ unsigned long idle_cutoff = jiffies - opt -> caps_wanted_delay_min * HZ ;
996
+
997
+ if (S_ISDIR (ci -> vfs_inode .i_mode )) {
998
+ int want = 0 ;
999
+
1000
+ /* use used_cutoff here, to keep dir's wanted caps longer */
1001
+ if (ci -> i_nr_by_mode [RD_SHIFT ] > 0 ||
1002
+ time_after (ci -> i_last_rd , used_cutoff ))
1003
+ want |= CEPH_CAP_ANY_SHARED ;
1004
+
1005
+ if (ci -> i_nr_by_mode [WR_SHIFT ] > 0 ||
1006
+ time_after (ci -> i_last_wr , used_cutoff )) {
1007
+ want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL ;
1008
+ if (opt -> flags & CEPH_MOUNT_OPT_ASYNC_DIROPS )
1009
+ want |= CEPH_CAP_ANY_DIR_OPS ;
1010
+ }
1011
+
1012
+ if (want || ci -> i_nr_by_mode [PIN_SHIFT ] > 0 )
1013
+ want |= CEPH_CAP_PIN ;
1014
+
1015
+ return want ;
1016
+ } else {
1017
+ int bits = 0 ;
1018
+
1019
+ if (ci -> i_nr_by_mode [RD_SHIFT ] > 0 ) {
1020
+ if (ci -> i_nr_by_mode [RD_SHIFT ] >= FMODE_WAIT_BIAS ||
1021
+ time_after (ci -> i_last_rd , used_cutoff ))
1022
+ bits |= 1 << RD_SHIFT ;
1023
+ } else if (time_after (ci -> i_last_rd , idle_cutoff )) {
1024
+ bits |= 1 << RD_SHIFT ;
1025
+ }
1026
+
1027
+ if (ci -> i_nr_by_mode [WR_SHIFT ] > 0 ) {
1028
+ if (ci -> i_nr_by_mode [WR_SHIFT ] >= FMODE_WAIT_BIAS ||
1029
+ time_after (ci -> i_last_wr , used_cutoff ))
1030
+ bits |= 1 << WR_SHIFT ;
1031
+ } else if (time_after (ci -> i_last_wr , idle_cutoff )) {
1032
+ bits |= 1 << WR_SHIFT ;
1033
+ }
1034
+
1035
+ /* check lazyio only when read/write is wanted */
1036
+ if ((bits & (CEPH_FILE_MODE_RDWR << 1 )) &&
1037
+ ci -> i_nr_by_mode [LAZY_SHIFT ] > 0 )
1038
+ bits |= 1 << LAZY_SHIFT ;
1039
+
1040
+ return bits ? ceph_caps_for_mode (bits >> 1 ) : 0 ;
990
1041
}
991
- if (bits == 0 )
992
- return 0 ;
993
- return ceph_caps_for_mode (bits >> 1 );
994
1042
}
995
1043
996
1044
/*
@@ -1032,14 +1080,6 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
1032
1080
return mds_wanted ;
1033
1081
}
1034
1082
1035
- /*
1036
- * called under i_ceph_lock
1037
- */
1038
- static int __ceph_is_single_caps (struct ceph_inode_info * ci )
1039
- {
1040
- return rb_first (& ci -> i_caps ) == rb_last (& ci -> i_caps );
1041
- }
1042
-
1043
1083
int ceph_is_any_caps (struct inode * inode )
1044
1084
{
1045
1085
struct ceph_inode_info * ci = ceph_inode (inode );
@@ -1877,10 +1917,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1877
1917
if (ci -> i_ceph_flags & CEPH_I_FLUSH )
1878
1918
flags |= CHECK_CAPS_FLUSH ;
1879
1919
1880
- if (!(flags & CHECK_CAPS_AUTHONLY ) ||
1881
- (ci -> i_auth_cap && __ceph_is_single_caps (ci )))
1882
- __cap_delay_cancel (mdsc , ci );
1883
-
1884
1920
goto retry_locked ;
1885
1921
retry :
1886
1922
spin_lock (& ci -> i_ceph_lock );
@@ -1907,9 +1943,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1907
1943
if (IS_RDONLY (inode )) {
1908
1944
want = CEPH_CAP_ANY_SHARED ;
1909
1945
} else {
1910
- want = CEPH_CAP_ANY_SHARED |
1911
- CEPH_CAP_FILE_EXCL |
1912
- CEPH_CAP_ANY_DIR_OPS ;
1946
+ want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL ;
1913
1947
}
1914
1948
retain |= want ;
1915
1949
} else {
@@ -2105,9 +2139,17 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
2105
2139
goto retry ; /* retake i_ceph_lock and restart our cap scan. */
2106
2140
}
2107
2141
2108
- /* Reschedule delayed caps release if we delayed anything */
2109
- if (delayed )
2110
- __cap_delay_requeue (mdsc , ci , false);
2142
+ if (list_empty (& ci -> i_cap_delay_list )) {
2143
+ if (delayed ) {
2144
+ /* Reschedule delayed caps release if we delayed anything */
2145
+ __cap_delay_requeue (mdsc , ci , false);
2146
+ } else if (__ceph_is_any_real_caps (ci ) &&
2147
+ (file_wanted & ~CEPH_CAP_PIN ) &&
2148
+ !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR ))) {
2149
+ /* periodically re-calculate caps wanted by open files */
2150
+ __cap_delay_requeue (mdsc , ci , true);
2151
+ }
2152
+ }
2111
2153
2112
2154
spin_unlock (& ci -> i_ceph_lock );
2113
2155
@@ -2573,8 +2615,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
2573
2615
* FIXME: how does a 0 return differ from -EAGAIN?
2574
2616
*/
2575
2617
enum {
2576
- NON_BLOCKING = 1 ,
2577
- CHECK_FILELOCK = 2 ,
2618
+ /* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
2619
+ NON_BLOCKING = (1 << 8 ),
2620
+ CHECK_FILELOCK = (1 << 9 ),
2578
2621
};
2579
2622
2580
2623
static int try_get_cap_refs (struct inode * inode , int need , int want ,
@@ -2584,7 +2627,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
2584
2627
struct ceph_mds_client * mdsc = ceph_inode_to_client (inode )-> mdsc ;
2585
2628
int ret = 0 ;
2586
2629
int have , implemented ;
2587
- int file_wanted ;
2588
2630
bool snap_rwsem_locked = false;
2589
2631
2590
2632
dout ("get_cap_refs %p need %s want %s\n" , inode ,
@@ -2600,15 +2642,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
2600
2642
goto out_unlock ;
2601
2643
}
2602
2644
2603
- /* make sure file is actually open */
2604
- file_wanted = __ceph_caps_file_wanted (ci );
2605
- if ((file_wanted & need ) != need ) {
2606
- dout ("try_get_cap_refs need %s file_wanted %s, EBADF\n" ,
2607
- ceph_cap_string (need ), ceph_cap_string (file_wanted ));
2608
- ret = - EBADF ;
2609
- goto out_unlock ;
2610
- }
2611
-
2612
2645
/* finish pending truncate */
2613
2646
while (ci -> i_truncate_pending ) {
2614
2647
spin_unlock (& ci -> i_ceph_lock );
@@ -2719,6 +2752,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
2719
2752
ceph_cap_string (have ), ceph_cap_string (need ));
2720
2753
}
2721
2754
out_unlock :
2755
+
2756
+ __ceph_touch_fmode (ci , mdsc , flags );
2757
+
2722
2758
spin_unlock (& ci -> i_ceph_lock );
2723
2759
if (snap_rwsem_locked )
2724
2760
up_read (& mdsc -> snap_rwsem );
@@ -2756,10 +2792,20 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2756
2792
ceph_check_caps (ci , CHECK_CAPS_AUTHONLY , NULL );
2757
2793
}
2758
2794
2795
+ static inline int get_used_fmode (int caps )
2796
+ {
2797
+ int fmode = 0 ;
2798
+ if (caps & CEPH_CAP_FILE_RD )
2799
+ fmode |= CEPH_FILE_MODE_RD ;
2800
+ if (caps & CEPH_CAP_FILE_WR )
2801
+ fmode |= CEPH_FILE_MODE_WR ;
2802
+ return fmode ;
2803
+ }
2804
+
2759
2805
int ceph_try_get_caps (struct inode * inode , int need , int want ,
2760
2806
bool nonblock , int * got )
2761
2807
{
2762
- int ret ;
2808
+ int ret , flags ;
2763
2809
2764
2810
BUG_ON (need & ~CEPH_CAP_FILE_RD );
2765
2811
BUG_ON (want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
@@ -2771,8 +2817,11 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
2771
2817
return ret ;
2772
2818
}
2773
2819
2774
- ret = try_get_cap_refs (inode , need , want , 0 ,
2775
- (nonblock ? NON_BLOCKING : 0 ), got );
2820
+ flags = get_used_fmode (need | want );
2821
+ if (nonblock )
2822
+ flags |= NON_BLOCKING ;
2823
+
2824
+ ret = try_get_cap_refs (inode , need , want , 0 , flags , got );
2776
2825
return ret == - EAGAIN ? 0 : ret ;
2777
2826
}
2778
2827
@@ -2798,11 +2847,15 @@ int ceph_get_caps(struct file *filp, int need, int want,
2798
2847
fi -> filp_gen != READ_ONCE (fsc -> filp_gen ))
2799
2848
return - EBADF ;
2800
2849
2850
+ flags = get_used_fmode (need | want );
2851
+
2801
2852
while (true) {
2802
2853
if (endoff > 0 )
2803
2854
check_max_size (inode , endoff );
2804
2855
2805
- flags = atomic_read (& fi -> num_locks ) ? CHECK_FILELOCK : 0 ;
2856
+ flags &= CEPH_FILE_MODE_MASK ;
2857
+ if (atomic_read (& fi -> num_locks ))
2858
+ flags |= CHECK_FILELOCK ;
2806
2859
_got = 0 ;
2807
2860
ret = try_get_cap_refs (inode , need , want , endoff ,
2808
2861
flags , & _got );
@@ -2822,6 +2875,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
2822
2875
list_add (& cw .list , & mdsc -> cap_wait_list );
2823
2876
spin_unlock (& mdsc -> caps_list_lock );
2824
2877
2878
+ /* make sure used fmode not timeout */
2879
+ ceph_get_fmode (ci , flags , FMODE_WAIT_BIAS );
2825
2880
add_wait_queue (& ci -> i_cap_wq , & wait );
2826
2881
2827
2882
flags |= NON_BLOCKING ;
@@ -2835,6 +2890,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
2835
2890
}
2836
2891
2837
2892
remove_wait_queue (& ci -> i_cap_wq , & wait );
2893
+ ceph_put_fmode (ci , flags , FMODE_WAIT_BIAS );
2838
2894
2839
2895
spin_lock (& mdsc -> caps_list_lock );
2840
2896
list_del (& cw .list );
@@ -2854,7 +2910,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
2854
2910
if (ret < 0 ) {
2855
2911
if (ret == - ESTALE ) {
2856
2912
/* session was killed, try renew caps */
2857
- ret = ceph_renew_caps (inode );
2913
+ ret = ceph_renew_caps (inode , flags );
2858
2914
if (ret == 0 )
2859
2915
continue ;
2860
2916
}
@@ -4153,6 +4209,33 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
4153
4209
dout ("flush_dirty_caps done\n" );
4154
4210
}
4155
4211
4212
+ void __ceph_touch_fmode (struct ceph_inode_info * ci ,
4213
+ struct ceph_mds_client * mdsc , int fmode )
4214
+ {
4215
+ unsigned long now = jiffies ;
4216
+ if (fmode & CEPH_FILE_MODE_RD )
4217
+ ci -> i_last_rd = now ;
4218
+ if (fmode & CEPH_FILE_MODE_WR )
4219
+ ci -> i_last_wr = now ;
4220
+ /* queue periodic check */
4221
+ if (fmode &&
4222
+ __ceph_is_any_real_caps (ci ) &&
4223
+ list_empty (& ci -> i_cap_delay_list ))
4224
+ __cap_delay_requeue (mdsc , ci , true);
4225
+ }
4226
+
4227
+ void ceph_get_fmode (struct ceph_inode_info * ci , int fmode , int count )
4228
+ {
4229
+ int i ;
4230
+ int bits = (fmode << 1 ) | 1 ;
4231
+ spin_lock (& ci -> i_ceph_lock );
4232
+ for (i = 0 ; i < CEPH_FILE_MODE_BITS ; i ++ ) {
4233
+ if (bits & (1 << i ))
4234
+ ci -> i_nr_by_mode [i ] += count ;
4235
+ }
4236
+ spin_unlock (& ci -> i_ceph_lock );
4237
+ }
4238
+
4156
4239
void __ceph_get_fmode (struct ceph_inode_info * ci , int fmode )
4157
4240
{
4158
4241
int i ;
@@ -4168,26 +4251,18 @@ void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
4168
4251
* we may need to release capabilities to the MDS (or schedule
4169
4252
* their delayed release).
4170
4253
*/
4171
- void ceph_put_fmode (struct ceph_inode_info * ci , int fmode )
4254
+ void ceph_put_fmode (struct ceph_inode_info * ci , int fmode , int count )
4172
4255
{
4173
- int i , last = 0 ;
4256
+ int i ;
4174
4257
int bits = (fmode << 1 ) | 1 ;
4175
4258
spin_lock (& ci -> i_ceph_lock );
4176
4259
for (i = 0 ; i < CEPH_FILE_MODE_BITS ; i ++ ) {
4177
4260
if (bits & (1 << i )) {
4178
- BUG_ON (ci -> i_nr_by_mode [i ] == 0 );
4179
- if (-- ci -> i_nr_by_mode [i ] == 0 )
4180
- last ++ ;
4261
+ BUG_ON (ci -> i_nr_by_mode [i ] < count );
4262
+ ci -> i_nr_by_mode [i ] -= count ;
4181
4263
}
4182
4264
}
4183
- dout ("put_fmode %p fmode %d {%d,%d,%d,%d}\n" ,
4184
- & ci -> vfs_inode , fmode ,
4185
- ci -> i_nr_by_mode [0 ], ci -> i_nr_by_mode [1 ],
4186
- ci -> i_nr_by_mode [2 ], ci -> i_nr_by_mode [3 ]);
4187
4265
spin_unlock (& ci -> i_ceph_lock );
4188
-
4189
- if (last && ci -> i_vino .snap == CEPH_NOSNAP )
4190
- ceph_check_caps (ci , 0 , NULL );
4191
4266
}
4192
4267
4193
4268
/*
0 commit comments