25
25
26
26
typedef int (* iomap_punch_t )(struct inode * inode , loff_t offset , loff_t length );
27
27
/*
28
- * Structure allocated for each folio to track per-block uptodate state
28
+ * Structure allocated for each folio to track per-block uptodate, dirty state
29
29
* and I/O completions.
30
30
*/
31
31
struct iomap_folio_state {
32
32
atomic_t read_bytes_pending ;
33
33
atomic_t write_bytes_pending ;
34
34
spinlock_t state_lock ;
35
+
36
+ /*
37
+ * Each block has two bits in this bitmap:
38
+ * Bits [0..blocks_per_folio) has the uptodate status.
39
+ * Bits [b_p_f...(2*b_p_f)) has the dirty status.
40
+ */
35
41
unsigned long state [];
36
42
};
37
43
@@ -78,6 +84,61 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
78
84
folio_mark_uptodate (folio );
79
85
}
80
86
87
+ static inline bool ifs_block_is_dirty (struct folio * folio ,
88
+ struct iomap_folio_state * ifs , int block )
89
+ {
90
+ struct inode * inode = folio -> mapping -> host ;
91
+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
92
+
93
+ return test_bit (block + blks_per_folio , ifs -> state );
94
+ }
95
+
96
+ static void ifs_clear_range_dirty (struct folio * folio ,
97
+ struct iomap_folio_state * ifs , size_t off , size_t len )
98
+ {
99
+ struct inode * inode = folio -> mapping -> host ;
100
+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
101
+ unsigned int first_blk = (off >> inode -> i_blkbits );
102
+ unsigned int last_blk = (off + len - 1 ) >> inode -> i_blkbits ;
103
+ unsigned int nr_blks = last_blk - first_blk + 1 ;
104
+ unsigned long flags ;
105
+
106
+ spin_lock_irqsave (& ifs -> state_lock , flags );
107
+ bitmap_clear (ifs -> state , first_blk + blks_per_folio , nr_blks );
108
+ spin_unlock_irqrestore (& ifs -> state_lock , flags );
109
+ }
110
+
111
+ static void iomap_clear_range_dirty (struct folio * folio , size_t off , size_t len )
112
+ {
113
+ struct iomap_folio_state * ifs = folio -> private ;
114
+
115
+ if (ifs )
116
+ ifs_clear_range_dirty (folio , ifs , off , len );
117
+ }
118
+
119
+ static void ifs_set_range_dirty (struct folio * folio ,
120
+ struct iomap_folio_state * ifs , size_t off , size_t len )
121
+ {
122
+ struct inode * inode = folio -> mapping -> host ;
123
+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
124
+ unsigned int first_blk = (off >> inode -> i_blkbits );
125
+ unsigned int last_blk = (off + len - 1 ) >> inode -> i_blkbits ;
126
+ unsigned int nr_blks = last_blk - first_blk + 1 ;
127
+ unsigned long flags ;
128
+
129
+ spin_lock_irqsave (& ifs -> state_lock , flags );
130
+ bitmap_set (ifs -> state , first_blk + blks_per_folio , nr_blks );
131
+ spin_unlock_irqrestore (& ifs -> state_lock , flags );
132
+ }
133
+
134
+ static void iomap_set_range_dirty (struct folio * folio , size_t off , size_t len )
135
+ {
136
+ struct iomap_folio_state * ifs = folio -> private ;
137
+
138
+ if (ifs )
139
+ ifs_set_range_dirty (folio , ifs , off , len );
140
+ }
141
+
81
142
static struct iomap_folio_state * ifs_alloc (struct inode * inode ,
82
143
struct folio * folio , unsigned int flags )
83
144
{
@@ -93,14 +154,24 @@ static struct iomap_folio_state *ifs_alloc(struct inode *inode,
93
154
else
94
155
gfp = GFP_NOFS | __GFP_NOFAIL ;
95
156
96
- ifs = kzalloc (struct_size (ifs , state , BITS_TO_LONGS (nr_blocks )),
97
- gfp );
98
- if (ifs ) {
99
- spin_lock_init (& ifs -> state_lock );
100
- if (folio_test_uptodate (folio ))
101
- bitmap_fill (ifs -> state , nr_blocks );
102
- folio_attach_private (folio , ifs );
103
- }
157
+ /*
158
+ * ifs->state tracks two sets of state flags when the
159
+ * filesystem block size is smaller than the folio size.
160
+ * The first state tracks per-block uptodate and the
161
+ * second tracks per-block dirty state.
162
+ */
163
+ ifs = kzalloc (struct_size (ifs , state ,
164
+ BITS_TO_LONGS (2 * nr_blocks )), gfp );
165
+ if (!ifs )
166
+ return ifs ;
167
+
168
+ spin_lock_init (& ifs -> state_lock );
169
+ if (folio_test_uptodate (folio ))
170
+ bitmap_set (ifs -> state , 0 , nr_blocks );
171
+ if (folio_test_dirty (folio ))
172
+ bitmap_set (ifs -> state , nr_blocks , nr_blocks );
173
+ folio_attach_private (folio , ifs );
174
+
104
175
return ifs ;
105
176
}
106
177
@@ -519,6 +590,17 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
519
590
}
520
591
EXPORT_SYMBOL_GPL (iomap_invalidate_folio );
521
592
593
+ bool iomap_dirty_folio (struct address_space * mapping , struct folio * folio )
594
+ {
595
+ struct inode * inode = mapping -> host ;
596
+ size_t len = folio_size (folio );
597
+
598
+ ifs_alloc (inode , folio , 0 );
599
+ iomap_set_range_dirty (folio , 0 , len );
600
+ return filemap_dirty_folio (mapping , folio );
601
+ }
602
+ EXPORT_SYMBOL_GPL (iomap_dirty_folio );
603
+
522
604
static void
523
605
iomap_write_failed (struct inode * inode , loff_t pos , unsigned len )
524
606
{
@@ -723,6 +805,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
723
805
if (unlikely (copied < len && !folio_test_uptodate (folio )))
724
806
return 0 ;
725
807
iomap_set_range_uptodate (folio , offset_in_folio (folio , pos ), len );
808
+ iomap_set_range_dirty (folio , offset_in_folio (folio , pos ), copied );
726
809
filemap_dirty_folio (inode -> i_mapping , folio );
727
810
return copied ;
728
811
}
@@ -892,6 +975,43 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
892
975
}
893
976
EXPORT_SYMBOL_GPL (iomap_file_buffered_write );
894
977
978
+ static int iomap_write_delalloc_ifs_punch (struct inode * inode ,
979
+ struct folio * folio , loff_t start_byte , loff_t end_byte ,
980
+ iomap_punch_t punch )
981
+ {
982
+ unsigned int first_blk , last_blk , i ;
983
+ loff_t last_byte ;
984
+ u8 blkbits = inode -> i_blkbits ;
985
+ struct iomap_folio_state * ifs ;
986
+ int ret = 0 ;
987
+
988
+ /*
989
+ * When we have per-block dirty tracking, there can be
990
+ * blocks within a folio which are marked uptodate
991
+ * but not dirty. In that case it is necessary to punch
992
+ * out such blocks to avoid leaking any delalloc blocks.
993
+ */
994
+ ifs = folio -> private ;
995
+ if (!ifs )
996
+ return ret ;
997
+
998
+ last_byte = min_t (loff_t , end_byte - 1 ,
999
+ folio_pos (folio ) + folio_size (folio ) - 1 );
1000
+ first_blk = offset_in_folio (folio , start_byte ) >> blkbits ;
1001
+ last_blk = offset_in_folio (folio , last_byte ) >> blkbits ;
1002
+ for (i = first_blk ; i <= last_blk ; i ++ ) {
1003
+ if (!ifs_block_is_dirty (folio , ifs , i )) {
1004
+ ret = punch (inode , folio_pos (folio ) + (i << blkbits ),
1005
+ 1 << blkbits );
1006
+ if (ret )
1007
+ return ret ;
1008
+ }
1009
+ }
1010
+
1011
+ return ret ;
1012
+ }
1013
+
1014
+
895
1015
static int iomap_write_delalloc_punch (struct inode * inode , struct folio * folio ,
896
1016
loff_t * punch_start_byte , loff_t start_byte , loff_t end_byte ,
897
1017
iomap_punch_t punch )
@@ -909,6 +1029,12 @@ static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
909
1029
return ret ;
910
1030
}
911
1031
1032
+ /* Punch non-dirty blocks within folio */
1033
+ ret = iomap_write_delalloc_ifs_punch (inode , folio , start_byte ,
1034
+ end_byte , punch );
1035
+ if (ret )
1036
+ return ret ;
1037
+
912
1038
/*
913
1039
* Make sure the next punch start is correctly bound to
914
1040
* the end of this data range, not the end of the folio.
@@ -1639,14 +1765,21 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1639
1765
struct writeback_control * wbc , struct inode * inode ,
1640
1766
struct folio * folio , u64 end_pos )
1641
1767
{
1642
- struct iomap_folio_state * ifs = ifs_alloc ( inode , folio , 0 ) ;
1768
+ struct iomap_folio_state * ifs = folio -> private ;
1643
1769
struct iomap_ioend * ioend , * next ;
1644
1770
unsigned len = i_blocksize (inode );
1645
1771
unsigned nblocks = i_blocks_per_folio (inode , folio );
1646
1772
u64 pos = folio_pos (folio );
1647
1773
int error = 0 , count = 0 , i ;
1648
1774
LIST_HEAD (submit_list );
1649
1775
1776
+ WARN_ON_ONCE (end_pos <= pos );
1777
+
1778
+ if (!ifs && nblocks > 1 ) {
1779
+ ifs = ifs_alloc (inode , folio , 0 );
1780
+ iomap_set_range_dirty (folio , 0 , end_pos - pos );
1781
+ }
1782
+
1650
1783
WARN_ON_ONCE (ifs && atomic_read (& ifs -> write_bytes_pending ) != 0 );
1651
1784
1652
1785
/*
@@ -1655,7 +1788,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1655
1788
* invalid, grab a new one.
1656
1789
*/
1657
1790
for (i = 0 ; i < nblocks && pos < end_pos ; i ++ , pos += len ) {
1658
- if (ifs && !ifs_block_is_uptodate ( ifs , i ))
1791
+ if (ifs && !ifs_block_is_dirty ( folio , ifs , i ))
1659
1792
continue ;
1660
1793
1661
1794
error = wpc -> ops -> map_blocks (wpc , inode , pos );
@@ -1699,6 +1832,12 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1699
1832
}
1700
1833
}
1701
1834
1835
+ /*
1836
+ * We can have dirty bits set past end of file in page_mkwrite path
1837
+ * while mapping the last partial folio. Hence it's better to clear
1838
+ * all the dirty bits in the folio here.
1839
+ */
1840
+ iomap_clear_range_dirty (folio , 0 , folio_size (folio ));
1702
1841
folio_start_writeback (folio );
1703
1842
folio_unlock (folio );
1704
1843
0 commit comments