@@ -347,10 +347,77 @@ xfs_file_splice_read(
347
347
return ret ;
348
348
}
349
349
350
+ /*
351
+ * Take care of zeroing post-EOF blocks when they might exist.
352
+ *
353
+ * Returns 0 if successfully, a negative error for a failure, or 1 if this
354
+ * function dropped the iolock and reacquired it exclusively and the caller
355
+ * needs to restart the write sanity checks.
356
+ */
357
+ static ssize_t
358
+ xfs_file_write_zero_eof (
359
+ struct kiocb * iocb ,
360
+ struct iov_iter * from ,
361
+ unsigned int * iolock ,
362
+ size_t count ,
363
+ bool * drained_dio )
364
+ {
365
+ struct xfs_inode * ip = XFS_I (iocb -> ki_filp -> f_mapping -> host );
366
+ loff_t isize ;
367
+
368
+ /*
369
+ * We need to serialise against EOF updates that occur in IO completions
370
+ * here. We want to make sure that nobody is changing the size while
371
+ * we do this check until we have placed an IO barrier (i.e. hold
372
+ * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
373
+ * spinlock effectively forms a memory barrier once we have
374
+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
375
+ * hence be able to correctly determine if we need to run zeroing.
376
+ */
377
+ spin_lock (& ip -> i_flags_lock );
378
+ isize = i_size_read (VFS_I (ip ));
379
+ if (iocb -> ki_pos <= isize ) {
380
+ spin_unlock (& ip -> i_flags_lock );
381
+ return 0 ;
382
+ }
383
+ spin_unlock (& ip -> i_flags_lock );
384
+
385
+ if (iocb -> ki_flags & IOCB_NOWAIT )
386
+ return - EAGAIN ;
387
+
388
+ if (!* drained_dio ) {
389
+ /*
390
+ * If zeroing is needed and we are currently holding the iolock
391
+ * shared, we need to update it to exclusive which implies
392
+ * having to redo all checks before.
393
+ */
394
+ if (* iolock == XFS_IOLOCK_SHARED ) {
395
+ xfs_iunlock (ip , * iolock );
396
+ * iolock = XFS_IOLOCK_EXCL ;
397
+ xfs_ilock (ip , * iolock );
398
+ iov_iter_reexpand (from , count );
399
+ }
400
+
401
+ /*
402
+ * We now have an IO submission barrier in place, but AIO can do
403
+ * EOF updates during IO completion and hence we now need to
404
+ * wait for all of them to drain. Non-AIO DIO will have drained
405
+ * before we are given the XFS_IOLOCK_EXCL, and so for most
406
+ * cases this wait is a no-op.
407
+ */
408
+ inode_dio_wait (VFS_I (ip ));
409
+ * drained_dio = true;
410
+ return 1 ;
411
+ }
412
+
413
+ trace_xfs_zero_eof (ip , isize , iocb -> ki_pos - isize );
414
+ return xfs_zero_range (ip , isize , iocb -> ki_pos - isize , NULL );
415
+ }
416
+
350
417
/*
351
418
* Common pre-write limit and setup checks.
352
419
*
353
- * Called with the iolocked held either shared and exclusive according to
420
+ * Called with the iolock held either shared and exclusive according to
354
421
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
355
422
* if called for a direct write beyond i_size.
356
423
*/
@@ -360,13 +427,10 @@ xfs_file_write_checks(
360
427
struct iov_iter * from ,
361
428
unsigned int * iolock )
362
429
{
363
- struct file * file = iocb -> ki_filp ;
364
- struct inode * inode = file -> f_mapping -> host ;
365
- struct xfs_inode * ip = XFS_I (inode );
366
- ssize_t error = 0 ;
430
+ struct inode * inode = iocb -> ki_filp -> f_mapping -> host ;
367
431
size_t count = iov_iter_count (from );
368
432
bool drained_dio = false;
369
- loff_t isize ;
433
+ ssize_t error ;
370
434
371
435
restart :
372
436
error = generic_write_checks (iocb , from );
@@ -389,7 +453,7 @@ xfs_file_write_checks(
389
453
* exclusively.
390
454
*/
391
455
if (* iolock == XFS_IOLOCK_SHARED && !IS_NOSEC (inode )) {
392
- xfs_iunlock (ip , * iolock );
456
+ xfs_iunlock (XFS_I ( inode ) , * iolock );
393
457
* iolock = XFS_IOLOCK_EXCL ;
394
458
error = xfs_ilock_iocb (iocb , * iolock );
395
459
if (error ) {
@@ -400,64 +464,24 @@ xfs_file_write_checks(
400
464
}
401
465
402
466
/*
403
- * If the offset is beyond the size of the file, we need to zero any
467
+ * If the offset is beyond the size of the file, we need to zero all
404
468
* blocks that fall between the existing EOF and the start of this
405
- * write. If zeroing is needed and we are currently holding the iolock
406
- * shared, we need to update it to exclusive which implies having to
407
- * redo all checks before.
408
- *
409
- * We need to serialise against EOF updates that occur in IO completions
410
- * here. We want to make sure that nobody is changing the size while we
411
- * do this check until we have placed an IO barrier (i.e. hold the
412
- * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
413
- * spinlock effectively forms a memory barrier once we have the
414
- * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
415
- * hence be able to correctly determine if we need to run zeroing.
469
+ * write.
416
470
*
417
- * We can do an unlocked check here safely as IO completion can only
418
- * extend EOF. Truncate is locked out at this point, so the EOF can
419
- * not move backwards, only forwards. Hence we only need to take the
420
- * slow path and spin locks when we are at or beyond the current EOF.
471
+ * We can do an unlocked check for i_size here safely as I/O completion
472
+ * can only extend EOF. Truncate is locked out at this point, so the
473
+ * EOF can not move backwards, only forwards. Hence we only need to take
474
+ * the slow path when we are at or beyond the current EOF.
421
475
*/
422
- if (iocb -> ki_pos <= i_size_read (inode ))
423
- goto out ;
424
-
425
- spin_lock (& ip -> i_flags_lock );
426
- isize = i_size_read (inode );
427
- if (iocb -> ki_pos > isize ) {
428
- spin_unlock (& ip -> i_flags_lock );
429
-
430
- if (iocb -> ki_flags & IOCB_NOWAIT )
431
- return - EAGAIN ;
432
-
433
- if (!drained_dio ) {
434
- if (* iolock == XFS_IOLOCK_SHARED ) {
435
- xfs_iunlock (ip , * iolock );
436
- * iolock = XFS_IOLOCK_EXCL ;
437
- xfs_ilock (ip , * iolock );
438
- iov_iter_reexpand (from , count );
439
- }
440
- /*
441
- * We now have an IO submission barrier in place, but
442
- * AIO can do EOF updates during IO completion and hence
443
- * we now need to wait for all of them to drain. Non-AIO
444
- * DIO will have drained before we are given the
445
- * XFS_IOLOCK_EXCL, and so for most cases this wait is a
446
- * no-op.
447
- */
448
- inode_dio_wait (inode );
449
- drained_dio = true;
476
+ if (iocb -> ki_pos > i_size_read (inode )) {
477
+ error = xfs_file_write_zero_eof (iocb , from , iolock , count ,
478
+ & drained_dio );
479
+ if (error == 1 )
450
480
goto restart ;
451
- }
452
-
453
- trace_xfs_zero_eof (ip , isize , iocb -> ki_pos - isize );
454
- error = xfs_zero_range (ip , isize , iocb -> ki_pos - isize , NULL );
455
481
if (error )
456
482
return error ;
457
- } else
458
- spin_unlock (& ip -> i_flags_lock );
483
+ }
459
484
460
- out :
461
485
return kiocb_modified (iocb );
462
486
}
463
487
0 commit comments