@@ -306,80 +306,38 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
306
306
}
307
307
308
308
static ssize_t ext4_handle_inode_extension (struct inode * inode , loff_t offset ,
309
- ssize_t written , size_t count )
309
+ ssize_t count )
310
310
{
311
311
handle_t * handle ;
312
- bool truncate = false;
313
- u8 blkbits = inode -> i_blkbits ;
314
- ext4_lblk_t written_blk , end_blk ;
315
- int ret ;
316
-
317
- /*
318
- * Note that EXT4_I(inode)->i_disksize can get extended up to
319
- * inode->i_size while the I/O was running due to writeback of delalloc
320
- * blocks. But, the code in ext4_iomap_alloc() is careful to use
321
- * zeroed/unwritten extents if this is possible; thus we won't leave
322
- * uninitialized blocks in a file even if we didn't succeed in writing
323
- * as much as we intended.
324
- */
325
- WARN_ON_ONCE (i_size_read (inode ) < EXT4_I (inode )-> i_disksize );
326
- if (offset + count <= EXT4_I (inode )-> i_disksize ) {
327
- /*
328
- * We need to ensure that the inode is removed from the orphan
329
- * list if it has been added prematurely, due to writeback of
330
- * delalloc blocks.
331
- */
332
- if (!list_empty (& EXT4_I (inode )-> i_orphan ) && inode -> i_nlink ) {
333
- handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
334
-
335
- if (IS_ERR (handle )) {
336
- ext4_orphan_del (NULL , inode );
337
- return PTR_ERR (handle );
338
- }
339
-
340
- ext4_orphan_del (handle , inode );
341
- ext4_journal_stop (handle );
342
- }
343
-
344
- return written ;
345
- }
346
-
347
- if (written < 0 )
348
- goto truncate ;
349
312
313
+ lockdep_assert_held_write (& inode -> i_rwsem );
350
314
handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
351
- if (IS_ERR (handle )) {
352
- written = PTR_ERR (handle );
353
- goto truncate ;
354
- }
315
+ if (IS_ERR (handle ))
316
+ return PTR_ERR (handle );
355
317
356
- if (ext4_update_inode_size (inode , offset + written )) {
357
- ret = ext4_mark_inode_dirty (handle , inode );
318
+ if (ext4_update_inode_size (inode , offset + count )) {
319
+ int ret = ext4_mark_inode_dirty (handle , inode );
358
320
if (unlikely (ret )) {
359
- written = ret ;
360
321
ext4_journal_stop (handle );
361
- goto truncate ;
322
+ return ret ;
362
323
}
363
324
}
364
325
365
- /*
366
- * We may need to truncate allocated but not written blocks beyond EOF.
367
- */
368
- written_blk = ALIGN (offset + written , 1 << blkbits );
369
- end_blk = ALIGN (offset + count , 1 << blkbits );
370
- if (written_blk < end_blk && ext4_can_truncate (inode ))
371
- truncate = true;
372
-
373
- /*
374
- * Remove the inode from the orphan list if it has been extended and
375
- * everything went OK.
376
- */
377
- if (!truncate && inode -> i_nlink )
326
+ if (inode -> i_nlink )
378
327
ext4_orphan_del (handle , inode );
379
328
ext4_journal_stop (handle );
380
329
381
- if (truncate ) {
382
- truncate :
330
+ return count ;
331
+ }
332
+
333
+ /*
334
+ * Clean up the inode after DIO or DAX extending write has completed and the
335
+ * inode size has been updated using ext4_handle_inode_extension().
336
+ */
337
+ static void ext4_inode_extension_cleanup (struct inode * inode , ssize_t count )
338
+ {
339
+ lockdep_assert_held_write (& inode -> i_rwsem );
340
+ if (count < 0 ) {
383
341
ext4_truncate_failed_write (inode );
384
342
/*
385
343
* If the truncate operation failed early, then the inode may
@@ -388,9 +346,28 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
388
346
*/
389
347
if (inode -> i_nlink )
390
348
ext4_orphan_del (NULL , inode );
349
+ return ;
391
350
}
351
+ /*
352
+ * If i_disksize got extended due to writeback of delalloc blocks while
353
+ * the DIO was running we could fail to cleanup the orphan list in
354
+ * ext4_handle_inode_extension(). Do it now.
355
+ */
356
+ if (!list_empty (& EXT4_I (inode )-> i_orphan ) && inode -> i_nlink ) {
357
+ handle_t * handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
392
358
393
- return written ;
359
+ if (IS_ERR (handle )) {
360
+ /*
361
+ * The write has successfully completed. Not much to
362
+ * do with the error here so just cleanup the orphan
363
+ * list and hope for the best.
364
+ */
365
+ ext4_orphan_del (NULL , inode );
366
+ return ;
367
+ }
368
+ ext4_orphan_del (handle , inode );
369
+ ext4_journal_stop (handle );
370
+ }
394
371
}
395
372
396
373
static int ext4_dio_write_end_io (struct kiocb * iocb , ssize_t size ,
@@ -399,31 +376,22 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
399
376
loff_t pos = iocb -> ki_pos ;
400
377
struct inode * inode = file_inode (iocb -> ki_filp );
401
378
379
+ if (!error && size && flags & IOMAP_DIO_UNWRITTEN )
380
+ error = ext4_convert_unwritten_extents (NULL , inode , pos , size );
402
381
if (error )
403
382
return error ;
404
-
405
- if (size && flags & IOMAP_DIO_UNWRITTEN ) {
406
- error = ext4_convert_unwritten_extents (NULL , inode , pos , size );
407
- if (error < 0 )
408
- return error ;
409
- }
410
383
/*
411
- * If we are extending the file, we have to update i_size here before
412
- * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
413
- * buffered reads could zero out too much from page cache pages. Update
414
- * of on-disk size will happen later in ext4_dio_write_iter() where
415
- * we have enough information to also perform orphan list handling etc.
416
- * Note that we perform all extending writes synchronously under
417
- * i_rwsem held exclusively so i_size update is safe here in that case.
418
- * If the write was not extending, we cannot see pos > i_size here
419
- * because operations reducing i_size like truncate wait for all
420
- * outstanding DIO before updating i_size.
384
+ * Note that EXT4_I(inode)->i_disksize can get extended up to
385
+ * inode->i_size while the I/O was running due to writeback of delalloc
386
+ * blocks. But the code in ext4_iomap_alloc() is careful to use
387
+ * zeroed/unwritten extents if this is possible; thus we won't leave
388
+ * uninitialized blocks in a file even if we didn't succeed in writing
389
+ * as much as we intended.
421
390
*/
422
- pos += size ;
423
- if (pos > i_size_read (inode ))
424
- i_size_write (inode , pos );
425
-
426
- return 0 ;
391
+ WARN_ON_ONCE (i_size_read (inode ) < READ_ONCE (EXT4_I (inode )-> i_disksize ));
392
+ if (pos + size <= READ_ONCE (EXT4_I (inode )-> i_disksize ))
393
+ return size ;
394
+ return ext4_handle_inode_extension (inode , pos , size );
427
395
}
428
396
429
397
static const struct iomap_dio_ops ext4_dio_write_ops = {
@@ -608,9 +576,16 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
608
576
dio_flags , NULL , 0 );
609
577
if (ret == - ENOTBLK )
610
578
ret = 0 ;
611
-
612
- if (extend )
613
- ret = ext4_handle_inode_extension (inode , offset , ret , count );
579
+ if (extend ) {
580
+ /*
581
+ * We always perform extending DIO write synchronously so by
582
+ * now the IO is completed and ext4_handle_inode_extension()
583
+ * was called. Cleanup the inode in case of error or race with
584
+ * writeback of delalloc blocks.
585
+ */
586
+ WARN_ON_ONCE (ret == - EIOCBQUEUED );
587
+ ext4_inode_extension_cleanup (inode , ret );
588
+ }
614
589
615
590
out :
616
591
if (ilock_shared )
@@ -691,8 +666,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
691
666
692
667
ret = dax_iomap_rw (iocb , from , & ext4_iomap_ops );
693
668
694
- if (extend )
695
- ret = ext4_handle_inode_extension (inode , offset , ret , count );
669
+ if (extend ) {
670
+ ret = ext4_handle_inode_extension (inode , offset , ret );
671
+ ext4_inode_extension_cleanup (inode , ret );
672
+ }
696
673
out :
697
674
inode_unlock (inode );
698
675
if (ret > 0 )
0 commit comments