@@ -1774,7 +1774,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
17741774 */
17751775static int iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
17761776 struct writeback_control * wbc , struct folio * folio ,
1777- struct inode * inode , loff_t pos , unsigned len )
1777+ struct inode * inode , loff_t pos , loff_t end_pos ,
1778+ unsigned len )
17781779{
17791780 struct iomap_folio_state * ifs = folio -> private ;
17801781 size_t poff = offset_in_folio (folio , pos );
@@ -1793,15 +1794,60 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17931794
17941795 if (ifs )
17951796 atomic_add (len , & ifs -> write_bytes_pending );
1797+
1798+ /*
1799+ * Clamp io_offset and io_size to the incore EOF so that ondisk
1800+ * file size updates in the ioend completion are byte-accurate.
1801+ * This avoids recovering files with zeroed tail regions when
1802+ * writeback races with appending writes:
1803+ *
1804+ * Thread 1: Thread 2:
1805+ * ------------ -----------
1806+ * write [A, A+B]
1807+ * update inode size to A+B
1808+ * submit I/O [A, A+BS]
1809+ * write [A+B, A+B+C]
1810+ * update inode size to A+B+C
1811+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
1812+ * <power failure>
1813+ *
1814+ * After reboot:
1815+ * 1) with A+B+C < A+BS, the file has zero padding in range
1816+ * [A+B, A+B+C]
1817+ *
1818+ * |< Block Size (BS) >|
1819+ * |DDDDDDDDDDDD0000000000000|
1820+ * ^ ^ ^
1821+ * A A+B A+B+C
1822+ * (EOF)
1823+ *
1824+ * 2) with A+B+C > A+BS, the file has zero padding in range
1825+ * [A+B, A+BS]
1826+ *
1827+ * |< Block Size (BS) >|< Block Size (BS) >|
1828+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
1829+ * ^ ^ ^ ^
1830+ * A A+B A+BS A+B+C
1831+ * (EOF)
1832+ *
1833+ * D = Valid Data
1834+ * 0 = Zero Padding
1835+ *
1836+ * Note that this defeats the ability to chain the ioends of
1837+ * appending writes.
1838+ */
17961839 wpc -> ioend -> io_size += len ;
1840+ if (wpc -> ioend -> io_offset + wpc -> ioend -> io_size > end_pos )
1841+ wpc -> ioend -> io_size = end_pos - wpc -> ioend -> io_offset ;
1842+
17971843 wbc_account_cgroup_owner (wbc , folio , len );
17981844 return 0 ;
17991845}
18001846
18011847static int iomap_writepage_map_blocks (struct iomap_writepage_ctx * wpc ,
18021848 struct writeback_control * wbc , struct folio * folio ,
1803- struct inode * inode , u64 pos , unsigned dirty_len ,
1804- unsigned * count )
1849+ struct inode * inode , u64 pos , u64 end_pos ,
1850+ unsigned dirty_len , unsigned * count )
18051851{
18061852 int error ;
18071853
@@ -1826,7 +1872,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
18261872 break ;
18271873 default :
18281874 error = iomap_add_to_ioend (wpc , wbc , folio , inode , pos ,
1829- map_len );
1875+ end_pos , map_len );
18301876 if (!error )
18311877 (* count )++ ;
18321878 break ;
@@ -1897,11 +1943,11 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
18971943 * remaining memory is zeroed when mapped, and writes to that
18981944 * region are not written out to the file.
18991945 *
1900- * Also adjust the writeback range to skip all blocks entirely
1901- * beyond i_size.
1946+ * Also adjust the end_pos to the end of file and skip writeback
1947+ * for all blocks entirely beyond i_size.
19021948 */
19031949 folio_zero_segment (folio , poff , folio_size (folio ));
1904- * end_pos = round_up ( isize , i_blocksize ( inode )) ;
1950+ * end_pos = isize ;
19051951 }
19061952
19071953 return true;
@@ -1914,6 +1960,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19141960 struct inode * inode = folio -> mapping -> host ;
19151961 u64 pos = folio_pos (folio );
19161962 u64 end_pos = pos + folio_size (folio );
1963+ u64 end_aligned = 0 ;
19171964 unsigned count = 0 ;
19181965 int error = 0 ;
19191966 u32 rlen ;
@@ -1955,9 +2002,10 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
19552002 /*
19562003 * Walk through the folio to find dirty areas to write back.
19572004 */
1958- while ((rlen = iomap_find_dirty_range (folio , & pos , end_pos ))) {
2005+ end_aligned = round_up (end_pos , i_blocksize (inode ));
2006+ while ((rlen = iomap_find_dirty_range (folio , & pos , end_aligned ))) {
19592007 error = iomap_writepage_map_blocks (wpc , wbc , folio , inode ,
1960- pos , rlen , & count );
2008+ pos , end_pos , rlen , & count );
19612009 if (error )
19622010 break ;
19632011 pos += rlen ;
0 commit comments