@@ -347,10 +347,77 @@ xfs_file_splice_read(
347347 return ret ;
348348}
349349
350+ /*
351+ * Take care of zeroing post-EOF blocks when they might exist.
352+ *
353+ * Returns 0 if successfully, a negative error for a failure, or 1 if this
354+ * function dropped the iolock and reacquired it exclusively and the caller
355+ * needs to restart the write sanity checks.
356+ */
357+ static ssize_t
358+ xfs_file_write_zero_eof (
359+ struct kiocb * iocb ,
360+ struct iov_iter * from ,
361+ unsigned int * iolock ,
362+ size_t count ,
363+ bool * drained_dio )
364+ {
365+ struct xfs_inode * ip = XFS_I (iocb -> ki_filp -> f_mapping -> host );
366+ loff_t isize ;
367+
368+ /*
369+ * We need to serialise against EOF updates that occur in IO completions
370+ * here. We want to make sure that nobody is changing the size while
371+ * we do this check until we have placed an IO barrier (i.e. hold
372+ * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
373+ * spinlock effectively forms a memory barrier once we have
374+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
375+ * hence be able to correctly determine if we need to run zeroing.
376+ */
377+ spin_lock (& ip -> i_flags_lock );
378+ isize = i_size_read (VFS_I (ip ));
379+ if (iocb -> ki_pos <= isize ) {
380+ spin_unlock (& ip -> i_flags_lock );
381+ return 0 ;
382+ }
383+ spin_unlock (& ip -> i_flags_lock );
384+
385+ if (iocb -> ki_flags & IOCB_NOWAIT )
386+ return - EAGAIN ;
387+
388+ if (!* drained_dio ) {
389+ /*
390+ * If zeroing is needed and we are currently holding the iolock
391+ * shared, we need to update it to exclusive which implies
392+ * having to redo all checks before.
393+ */
394+ if (* iolock == XFS_IOLOCK_SHARED ) {
395+ xfs_iunlock (ip , * iolock );
396+ * iolock = XFS_IOLOCK_EXCL ;
397+ xfs_ilock (ip , * iolock );
398+ iov_iter_reexpand (from , count );
399+ }
400+
401+ /*
402+ * We now have an IO submission barrier in place, but AIO can do
403+ * EOF updates during IO completion and hence we now need to
404+ * wait for all of them to drain. Non-AIO DIO will have drained
405+ * before we are given the XFS_IOLOCK_EXCL, and so for most
406+ * cases this wait is a no-op.
407+ */
408+ inode_dio_wait (VFS_I (ip ));
409+ * drained_dio = true;
410+ return 1 ;
411+ }
412+
413+ trace_xfs_zero_eof (ip , isize , iocb -> ki_pos - isize );
414+ return xfs_zero_range (ip , isize , iocb -> ki_pos - isize , NULL );
415+ }
416+
350417/*
351418 * Common pre-write limit and setup checks.
352419 *
353- * Called with the iolocked held either shared and exclusive according to
420+ * Called with the iolock held either shared and exclusive according to
354421 * @iolock, and returns with it held. Might upgrade the iolock to exclusive
355422 * if called for a direct write beyond i_size.
356423 */
@@ -360,13 +427,10 @@ xfs_file_write_checks(
360427 struct iov_iter * from ,
361428 unsigned int * iolock )
362429{
363- struct file * file = iocb -> ki_filp ;
364- struct inode * inode = file -> f_mapping -> host ;
365- struct xfs_inode * ip = XFS_I (inode );
366- ssize_t error = 0 ;
430+ struct inode * inode = iocb -> ki_filp -> f_mapping -> host ;
367431 size_t count = iov_iter_count (from );
368432 bool drained_dio = false;
369- loff_t isize ;
433+ ssize_t error ;
370434
371435restart :
372436 error = generic_write_checks (iocb , from );
@@ -389,7 +453,7 @@ xfs_file_write_checks(
389453 * exclusively.
390454 */
391455 if (* iolock == XFS_IOLOCK_SHARED && !IS_NOSEC (inode )) {
392- xfs_iunlock (ip , * iolock );
456+ xfs_iunlock (XFS_I ( inode ) , * iolock );
393457 * iolock = XFS_IOLOCK_EXCL ;
394458 error = xfs_ilock_iocb (iocb , * iolock );
395459 if (error ) {
@@ -400,64 +464,24 @@ xfs_file_write_checks(
400464 }
401465
402466 /*
403- * If the offset is beyond the size of the file, we need to zero any
467+ * If the offset is beyond the size of the file, we need to zero all
404468 * blocks that fall between the existing EOF and the start of this
405- * write. If zeroing is needed and we are currently holding the iolock
406- * shared, we need to update it to exclusive which implies having to
407- * redo all checks before.
408- *
409- * We need to serialise against EOF updates that occur in IO completions
410- * here. We want to make sure that nobody is changing the size while we
411- * do this check until we have placed an IO barrier (i.e. hold the
412- * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
413- * spinlock effectively forms a memory barrier once we have the
414- * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
415- * hence be able to correctly determine if we need to run zeroing.
469+ * write.
416470 *
417- * We can do an unlocked check here safely as IO completion can only
418- * extend EOF. Truncate is locked out at this point, so the EOF can
419- * not move backwards, only forwards. Hence we only need to take the
420- * slow path and spin locks when we are at or beyond the current EOF.
471+ * We can do an unlocked check for i_size here safely as I/O completion
472+ * can only extend EOF. Truncate is locked out at this point, so the
473+ * EOF can not move backwards, only forwards. Hence we only need to take
474+ * the slow path when we are at or beyond the current EOF.
421475 */
422- if (iocb -> ki_pos <= i_size_read (inode ))
423- goto out ;
424-
425- spin_lock (& ip -> i_flags_lock );
426- isize = i_size_read (inode );
427- if (iocb -> ki_pos > isize ) {
428- spin_unlock (& ip -> i_flags_lock );
429-
430- if (iocb -> ki_flags & IOCB_NOWAIT )
431- return - EAGAIN ;
432-
433- if (!drained_dio ) {
434- if (* iolock == XFS_IOLOCK_SHARED ) {
435- xfs_iunlock (ip , * iolock );
436- * iolock = XFS_IOLOCK_EXCL ;
437- xfs_ilock (ip , * iolock );
438- iov_iter_reexpand (from , count );
439- }
440- /*
441- * We now have an IO submission barrier in place, but
442- * AIO can do EOF updates during IO completion and hence
443- * we now need to wait for all of them to drain. Non-AIO
444- * DIO will have drained before we are given the
445- * XFS_IOLOCK_EXCL, and so for most cases this wait is a
446- * no-op.
447- */
448- inode_dio_wait (inode );
449- drained_dio = true;
476+ if (iocb -> ki_pos > i_size_read (inode )) {
477+ error = xfs_file_write_zero_eof (iocb , from , iolock , count ,
478+ & drained_dio );
479+ if (error == 1 )
450480 goto restart ;
451- }
452-
453- trace_xfs_zero_eof (ip , isize , iocb -> ki_pos - isize );
454- error = xfs_zero_range (ip , isize , iocb -> ki_pos - isize , NULL );
455481 if (error )
456482 return error ;
457- } else
458- spin_unlock (& ip -> i_flags_lock );
483+ }
459484
460- out :
461485 return kiocb_modified (iocb );
462486}
463487
0 commit comments