Skip to content

Commit 6d29d7f

Browse files
committed
Merge tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: "We introduce 'courteous server' in this release. Previously NFSD would purge open and lock state for an unresponsive client after one lease period (typically 90 seconds). Now, after one lease period, another client can open and lock those files and the unresponsive client's lease is purged; otherwise if the unresponsive client's open and lock state is uncontended, the server retains that open and lock state for up to 24 hours, allowing the client's workload to resume after a lengthy network partition. A longstanding issue with NFSv4 file creation is also addressed. Previously a file creation can fail internally, returning an error to the client, but leave the newly created file in place as an artifact. The file creation code path has been reorganized so that internal failures and race conditions are less likely to result in an unwanted file creation. A fault injector has been added to help exercise paths that are run during kernel metadata cache invalidation. These caches contain information maintained by user space about exported filesystems. Many of our test workloads do not trigger cache invalidation. There is one patch that is needed to support PREEMPT_RT and a fix for an ancient 'sleep while spin-locked' splat that seems to have become easier to hit since v5.18-rc3" * tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (36 commits) NFSD: nfsd_file_put() can sleep NFSD: Add documenting comment for nfsd4_release_lockowner() NFSD: Modernize nfsd4_release_lockowner() NFSD: Fix possible sleep during nfsd4_release_lockowner() nfsd: destroy percpu stats counters after reply cache shutdown nfsd: Fix null-ptr-deref in nfsd_fill_super() nfsd: Unregister the cld notifier when laundry_wq create failed SUNRPC: Use RMW bitops in single-threaded hot paths NFSD: Clean up the show_nf_flags() macro NFSD: Trace filecache opens NFSD: Move documenting comment for nfsd4_process_open2() NFSD: Fix whitespace NFSD: Remove dprintk call sites from tail of nfsd4_open() NFSD: Instantiate a struct file when creating a regular NFSv4 file NFSD: Clean up nfsd_open_verified() NFSD: Remove do_nfsd_create() NFSD: Refactor NFSv4 OPEN(CREATE) NFSD: Refactor NFSv3 CREATE NFSD: Refactor nfsd_create_setattr() NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create() ...
2 parents 7f50d4d + 08af54b commit 6d29d7f

File tree

30 files changed

+985
-426
lines changed

30 files changed

+985
-426
lines changed

Documentation/filesystems/locking.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ prototypes::
434434
void (*lm_break)(struct file_lock *); /* break_lease callback */
435435
int (*lm_change)(struct file_lock **, int);
436436
bool (*lm_breaker_owns_lease)(struct file_lock *);
437+
bool (*lm_lock_expirable)(struct file_lock *);
438+
void (*lm_expire_lock)(void);
437439

438440
locking rules:
439441

@@ -445,6 +447,8 @@ lm_grant: no no no
445447
lm_break: yes no no
446448
lm_change yes no no
447449
lm_breaker_owns_lease: yes no no
450+
lm_lock_expirable yes no no
451+
lm_expire_lock no no yes
448452
====================== ============= ================= =========
449453

450454
buffer_head

fs/locks.c

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,34 @@ void locks_release_private(struct file_lock *fl)
300300
}
301301
EXPORT_SYMBOL_GPL(locks_release_private);
302302

303+
/**
304+
* locks_owner_has_blockers - Check for blocking lock requests
305+
* @flctx: file lock context
306+
* @owner: lock owner
307+
*
308+
* Return values:
309+
* %true: @owner has at least one blocker
310+
* %false: @owner has no blockers
311+
*/
312+
bool locks_owner_has_blockers(struct file_lock_context *flctx,
313+
fl_owner_t owner)
314+
{
315+
struct file_lock *fl;
316+
317+
spin_lock(&flctx->flc_lock);
318+
list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
319+
if (fl->fl_owner != owner)
320+
continue;
321+
if (!list_empty(&fl->fl_blocked_requests)) {
322+
spin_unlock(&flctx->flc_lock);
323+
return true;
324+
}
325+
}
326+
spin_unlock(&flctx->flc_lock);
327+
return false;
328+
}
329+
EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
330+
303331
/* Free a lock which is not in use. */
304332
void locks_free_lock(struct file_lock *fl)
305333
{
@@ -874,19 +902,32 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
874902
struct file_lock *cfl;
875903
struct file_lock_context *ctx;
876904
struct inode *inode = locks_inode(filp);
905+
void *owner;
906+
void (*func)(void);
877907

878908
ctx = smp_load_acquire(&inode->i_flctx);
879909
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
880910
fl->fl_type = F_UNLCK;
881911
return;
882912
}
883913

914+
retry:
884915
spin_lock(&ctx->flc_lock);
885916
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
886-
if (posix_locks_conflict(fl, cfl)) {
887-
locks_copy_conflock(fl, cfl);
888-
goto out;
917+
if (!posix_locks_conflict(fl, cfl))
918+
continue;
919+
if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
920+
&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
921+
owner = cfl->fl_lmops->lm_mod_owner;
922+
func = cfl->fl_lmops->lm_expire_lock;
923+
__module_get(owner);
924+
spin_unlock(&ctx->flc_lock);
925+
(*func)();
926+
module_put(owner);
927+
goto retry;
889928
}
929+
locks_copy_conflock(fl, cfl);
930+
goto out;
890931
}
891932
fl->fl_type = F_UNLCK;
892933
out:
@@ -1060,6 +1101,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
10601101
int error;
10611102
bool added = false;
10621103
LIST_HEAD(dispose);
1104+
void *owner;
1105+
void (*func)(void);
10631106

10641107
ctx = locks_get_lock_context(inode, request->fl_type);
10651108
if (!ctx)
@@ -1078,6 +1121,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
10781121
new_fl2 = locks_alloc_lock();
10791122
}
10801123

1124+
retry:
10811125
percpu_down_read(&file_rwsem);
10821126
spin_lock(&ctx->flc_lock);
10831127
/*
@@ -1089,6 +1133,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
10891133
list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
10901134
if (!posix_locks_conflict(request, fl))
10911135
continue;
1136+
if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
1137+
&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
1138+
owner = fl->fl_lmops->lm_mod_owner;
1139+
func = fl->fl_lmops->lm_expire_lock;
1140+
__module_get(owner);
1141+
spin_unlock(&ctx->flc_lock);
1142+
percpu_up_read(&file_rwsem);
1143+
(*func)();
1144+
module_put(owner);
1145+
goto retry;
1146+
}
10921147
if (conflock)
10931148
locks_copy_conflock(conflock, fl);
10941149
error = -EAGAIN;

fs/nfsd/filecache.c

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
303303
void
304304
nfsd_file_put(struct nfsd_file *nf)
305305
{
306+
might_sleep();
307+
306308
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
307309
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
308310
nfsd_file_flush(nf);
@@ -899,9 +901,9 @@ nfsd_file_is_cached(struct inode *inode)
899901
return ret;
900902
}
901903

902-
__be32
903-
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
904-
unsigned int may_flags, struct nfsd_file **pnf)
904+
static __be32
905+
nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
906+
unsigned int may_flags, struct nfsd_file **pnf, bool open)
905907
{
906908
__be32 status;
907909
struct net *net = SVC_NET(rqstp);
@@ -996,10 +998,14 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
996998
nfsd_file_gc();
997999

9981000
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
999-
if (nf->nf_mark)
1000-
status = nfsd_open_verified(rqstp, fhp, S_IFREG,
1001-
may_flags, &nf->nf_file);
1002-
else
1001+
if (nf->nf_mark) {
1002+
if (open) {
1003+
status = nfsd_open_verified(rqstp, fhp, may_flags,
1004+
&nf->nf_file);
1005+
trace_nfsd_file_open(nf, status);
1006+
} else
1007+
status = nfs_ok;
1008+
} else
10031009
status = nfserr_jukebox;
10041010
/*
10051011
* If construction failed, or we raced with a call to unlink()
@@ -1019,6 +1025,40 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
10191025
goto out;
10201026
}
10211027

1028+
/**
1029+
* nfsd_file_acquire - Get a struct nfsd_file with an open file
1030+
* @rqstp: the RPC transaction being executed
1031+
* @fhp: the NFS filehandle of the file to be opened
1032+
* @may_flags: NFSD_MAY_ settings for the file
1033+
* @pnf: OUT: new or found "struct nfsd_file" object
1034+
*
1035+
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
1036+
* network byte order is returned.
1037+
*/
1038+
__be32
1039+
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
1040+
unsigned int may_flags, struct nfsd_file **pnf)
1041+
{
1042+
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
1043+
}
1044+
1045+
/**
1046+
* nfsd_file_create - Get a struct nfsd_file, do not open
1047+
* @rqstp: the RPC transaction being executed
1048+
* @fhp: the NFS filehandle of the file just created
1049+
* @may_flags: NFSD_MAY_ settings for the file
1050+
* @pnf: OUT: new or found "struct nfsd_file" object
1051+
*
1052+
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
1053+
* network byte order is returned.
1054+
*/
1055+
__be32
1056+
nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1057+
unsigned int may_flags, struct nfsd_file **pnf)
1058+
{
1059+
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
1060+
}
1061+
10221062
/*
10231063
* Note that fields may be added, removed or reordered in the future. Programs
10241064
* scraping this file for info should test the labels to ensure they're

fs/nfsd/filecache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
5959
bool nfsd_file_is_cached(struct inode *inode);
6060
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
6161
unsigned int may_flags, struct nfsd_file **nfp);
62+
__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
63+
unsigned int may_flags, struct nfsd_file **nfp);
6264
int nfsd_file_cache_stats_open(struct inode *, struct file *);
6365
#endif /* _FS_NFSD_FILECACHE_H */

fs/nfsd/nfs3proc.c

Lines changed: 122 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/fs.h>
99
#include <linux/ext2_fs.h>
1010
#include <linux/magic.h>
11+
#include <linux/namei.h>
1112

1213
#include "cache.h"
1314
#include "xdr3.h"
@@ -220,17 +221,132 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
220221
}
221222

222223
/*
223-
* With NFSv3, CREATE processing is a lot easier than with NFSv2.
224-
* At least in theory; we'll see how it fares in practice when the
225-
* first reports about SunOS compatibility problems start to pour in...
224+
* Implement NFSv3's unchecked, guarded, and exclusive CREATE
225+
* semantics for regular files. Except for the created file,
226+
* this operation is stateless on the server.
227+
*
228+
* Upon return, caller must release @fhp and @resfhp.
226229
*/
230+
static __be32
231+
nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
232+
struct svc_fh *resfhp, struct nfsd3_createargs *argp)
233+
{
234+
struct iattr *iap = &argp->attrs;
235+
struct dentry *parent, *child;
236+
__u32 v_mtime, v_atime;
237+
struct inode *inode;
238+
__be32 status;
239+
int host_err;
240+
241+
if (isdotent(argp->name, argp->len))
242+
return nfserr_exist;
243+
if (!(iap->ia_valid & ATTR_MODE))
244+
iap->ia_mode = 0;
245+
246+
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
247+
if (status != nfs_ok)
248+
return status;
249+
250+
parent = fhp->fh_dentry;
251+
inode = d_inode(parent);
252+
253+
host_err = fh_want_write(fhp);
254+
if (host_err)
255+
return nfserrno(host_err);
256+
257+
fh_lock_nested(fhp, I_MUTEX_PARENT);
258+
259+
child = lookup_one_len(argp->name, parent, argp->len);
260+
if (IS_ERR(child)) {
261+
status = nfserrno(PTR_ERR(child));
262+
goto out;
263+
}
264+
265+
if (d_really_is_negative(child)) {
266+
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
267+
if (status != nfs_ok)
268+
goto out;
269+
}
270+
271+
status = fh_compose(resfhp, fhp->fh_export, child, fhp);
272+
if (status != nfs_ok)
273+
goto out;
274+
275+
v_mtime = 0;
276+
v_atime = 0;
277+
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
278+
u32 *verifier = (u32 *)argp->verf;
279+
280+
/*
281+
* Solaris 7 gets confused (bugid 4218508) if these have
282+
* the high bit set, as do xfs filesystems without the
283+
* "bigtime" feature. So just clear the high bits.
284+
*/
285+
v_mtime = verifier[0] & 0x7fffffff;
286+
v_atime = verifier[1] & 0x7fffffff;
287+
}
288+
289+
if (d_really_is_positive(child)) {
290+
status = nfs_ok;
291+
292+
switch (argp->createmode) {
293+
case NFS3_CREATE_UNCHECKED:
294+
if (!d_is_reg(child))
295+
break;
296+
iap->ia_valid &= ATTR_SIZE;
297+
goto set_attr;
298+
case NFS3_CREATE_GUARDED:
299+
status = nfserr_exist;
300+
break;
301+
case NFS3_CREATE_EXCLUSIVE:
302+
if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
303+
d_inode(child)->i_atime.tv_sec == v_atime &&
304+
d_inode(child)->i_size == 0) {
305+
break;
306+
}
307+
status = nfserr_exist;
308+
}
309+
goto out;
310+
}
311+
312+
if (!IS_POSIXACL(inode))
313+
iap->ia_mode &= ~current_umask();
314+
315+
host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
316+
if (host_err < 0) {
317+
status = nfserrno(host_err);
318+
goto out;
319+
}
320+
321+
/* A newly created file already has a file size of zero. */
322+
if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
323+
iap->ia_valid &= ~ATTR_SIZE;
324+
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
325+
iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
326+
ATTR_MTIME_SET | ATTR_ATIME_SET;
327+
iap->ia_mtime.tv_sec = v_mtime;
328+
iap->ia_atime.tv_sec = v_atime;
329+
iap->ia_mtime.tv_nsec = 0;
330+
iap->ia_atime.tv_nsec = 0;
331+
}
332+
333+
set_attr:
334+
status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
335+
336+
out:
337+
fh_unlock(fhp);
338+
if (child && !IS_ERR(child))
339+
dput(child);
340+
fh_drop_write(fhp);
341+
return status;
342+
}
343+
227344
static __be32
228345
nfsd3_proc_create(struct svc_rqst *rqstp)
229346
{
230347
struct nfsd3_createargs *argp = rqstp->rq_argp;
231348
struct nfsd3_diropres *resp = rqstp->rq_resp;
232-
svc_fh *dirfhp, *newfhp = NULL;
233-
struct iattr *attr;
349+
svc_fh *dirfhp, *newfhp;
234350

235351
dprintk("nfsd: CREATE(3) %s %.*s\n",
236352
SVCFH_fmt(&argp->fh),
@@ -239,21 +355,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
239355

240356
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
241357
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
242-
attr = &argp->attrs;
243-
244-
/* Unfudge the mode bits */
245-
attr->ia_mode &= ~S_IFMT;
246-
if (!(attr->ia_valid & ATTR_MODE)) {
247-
attr->ia_valid |= ATTR_MODE;
248-
attr->ia_mode = S_IFREG;
249-
} else {
250-
attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
251-
}
252358

253-
/* Now create the file and set attributes */
254-
resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
255-
attr, newfhp, argp->createmode,
256-
(u32 *)argp->verf, NULL, NULL);
359+
resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
257360
return rpc_success;
258361
}
259362

0 commit comments

Comments
 (0)