Skip to content

Commit c1c1204

Browse files
author
Damien Le Moal
committed
zonefs: fix zonefs_iomap_begin() for reads
If a readahead is issued to a sequential zone file with an offset exactly equal to the current file size, the iomap type is set to IOMAP_UNWRITTEN, which will prevent an IO, but the iomap length is calculated as 0. This causes a WARN_ON() in iomap_iter(): [17309.548939] WARNING: CPU: 3 PID: 2137 at fs/iomap/iter.c:34 iomap_iter+0x9cf/0xe80 [...] [17309.650907] RIP: 0010:iomap_iter+0x9cf/0xe80 [...] [17309.754560] Call Trace: [17309.757078] <TASK> [17309.759240] ? lock_is_held_type+0xd8/0x130 [17309.763531] iomap_readahead+0x1a8/0x870 [17309.767550] ? iomap_read_folio+0x4c0/0x4c0 [17309.771817] ? lockdep_hardirqs_on_prepare+0x400/0x400 [17309.778848] ? lock_release+0x370/0x750 [17309.784462] ? folio_add_lru+0x217/0x3f0 [17309.790220] ? reacquire_held_locks+0x4e0/0x4e0 [17309.796543] read_pages+0x17d/0xb60 [17309.801854] ? folio_add_lru+0x238/0x3f0 [17309.807573] ? readahead_expand+0x5f0/0x5f0 [17309.813554] ? policy_node+0xb5/0x140 [17309.819018] page_cache_ra_unbounded+0x27d/0x450 [17309.825439] filemap_get_pages+0x500/0x1450 [17309.831444] ? filemap_add_folio+0x140/0x140 [17309.837519] ? lock_is_held_type+0xd8/0x130 [17309.843509] filemap_read+0x28c/0x9f0 [17309.848953] ? zonefs_file_read_iter+0x1ea/0x4d0 [zonefs] [17309.856162] ? trace_contention_end+0xd6/0x130 [17309.862416] ? __mutex_lock+0x221/0x1480 [17309.868151] ? zonefs_file_read_iter+0x166/0x4d0 [zonefs] [17309.875364] ? filemap_get_pages+0x1450/0x1450 [17309.881647] ? __mutex_unlock_slowpath+0x15e/0x620 [17309.888248] ? wait_for_completion_io_timeout+0x20/0x20 [17309.895231] ? lock_is_held_type+0xd8/0x130 [17309.901115] ? lock_is_held_type+0xd8/0x130 [17309.906934] zonefs_file_read_iter+0x356/0x4d0 [zonefs] [17309.913750] new_sync_read+0x2d8/0x520 [17309.919035] ? __x64_sys_lseek+0x1d0/0x1d0 Furthermore, this causes iomap_readahead() to loop forever as iomap_readahead_iter() always returns 0, making no progress. Fix this by treating reads after the file size as access to holes, setting the iomap type to IOMAP_HOLE, the iomap addr to IOMAP_NULL_ADDR and using the length argument as is for the iomap length. To simplify the code with this change, zonefs_iomap_begin() is split into the read variant, zonefs_read_iomap_begin() and zonefs_read_iomap_ops, and the write variant, zonefs_write_iomap_begin() and zonefs_write_iomap_ops. Reported-by: Jorgen Hansen <[email protected]> Fixes: 8dcc1a9 ("fs: New zonefs file system") Signed-off-by: Damien Le Moal <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Johannes Thumshirn <[email protected]> Reviewed-by: Jorgen Hansen <[email protected]>
1 parent 96eca14 commit c1c1204

File tree

1 file changed

+64
-30
lines changed

1 file changed

+64
-30
lines changed

fs/zonefs/super.c

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,51 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
110110
}
111111
}
112112

113-
static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
114-
unsigned int flags, struct iomap *iomap,
115-
struct iomap *srcmap)
113+
static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
114+
loff_t length, unsigned int flags,
115+
struct iomap *iomap, struct iomap *srcmap)
116116
{
117117
struct zonefs_inode_info *zi = ZONEFS_I(inode);
118118
struct super_block *sb = inode->i_sb;
119119
loff_t isize;
120120

121-
/* All I/Os should always be within the file maximum size */
121+
/*
122+
* All blocks are always mapped below EOF. If reading past EOF,
123+
* act as if there is a hole up to the file maximum size.
124+
*/
125+
mutex_lock(&zi->i_truncate_mutex);
126+
iomap->bdev = inode->i_sb->s_bdev;
127+
iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
128+
isize = i_size_read(inode);
129+
if (iomap->offset >= isize) {
130+
iomap->type = IOMAP_HOLE;
131+
iomap->addr = IOMAP_NULL_ADDR;
132+
iomap->length = length;
133+
} else {
134+
iomap->type = IOMAP_MAPPED;
135+
iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
136+
iomap->length = isize - iomap->offset;
137+
}
138+
mutex_unlock(&zi->i_truncate_mutex);
139+
140+
trace_zonefs_iomap_begin(inode, iomap);
141+
142+
return 0;
143+
}
144+
145+
static const struct iomap_ops zonefs_read_iomap_ops = {
146+
.iomap_begin = zonefs_read_iomap_begin,
147+
};
148+
149+
static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
150+
loff_t length, unsigned int flags,
151+
struct iomap *iomap, struct iomap *srcmap)
152+
{
153+
struct zonefs_inode_info *zi = ZONEFS_I(inode);
154+
struct super_block *sb = inode->i_sb;
155+
loff_t isize;
156+
157+
/* All write I/Os should always be within the file maximum size */
122158
if (WARN_ON_ONCE(offset + length > zi->i_max_size))
123159
return -EIO;
124160

@@ -128,7 +164,7 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
128164
* operation.
129165
*/
130166
if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
131-
(flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)))
167+
!(flags & IOMAP_DIRECT)))
132168
return -EIO;
133169

134170
/*
@@ -137,47 +173,44 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
137173
* write pointer) and unwriten beyond.
138174
*/
139175
mutex_lock(&zi->i_truncate_mutex);
176+
iomap->bdev = inode->i_sb->s_bdev;
177+
iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
178+
iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
140179
isize = i_size_read(inode);
141-
if (offset >= isize)
180+
if (iomap->offset >= isize) {
142181
iomap->type = IOMAP_UNWRITTEN;
143-
else
182+
iomap->length = zi->i_max_size - iomap->offset;
183+
} else {
144184
iomap->type = IOMAP_MAPPED;
145-
if (flags & IOMAP_WRITE)
146-
length = zi->i_max_size - offset;
147-
else
148-
length = min(length, isize - offset);
185+
iomap->length = isize - iomap->offset;
186+
}
149187
mutex_unlock(&zi->i_truncate_mutex);
150188

151-
iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
152-
iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset;
153-
iomap->bdev = inode->i_sb->s_bdev;
154-
iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
155-
156189
trace_zonefs_iomap_begin(inode, iomap);
157190

158191
return 0;
159192
}
160193

161-
static const struct iomap_ops zonefs_iomap_ops = {
162-
.iomap_begin = zonefs_iomap_begin,
194+
static const struct iomap_ops zonefs_write_iomap_ops = {
195+
.iomap_begin = zonefs_write_iomap_begin,
163196
};
164197

165198
static int zonefs_read_folio(struct file *unused, struct folio *folio)
166199
{
167-
return iomap_read_folio(folio, &zonefs_iomap_ops);
200+
return iomap_read_folio(folio, &zonefs_read_iomap_ops);
168201
}
169202

170203
static void zonefs_readahead(struct readahead_control *rac)
171204
{
172-
iomap_readahead(rac, &zonefs_iomap_ops);
205+
iomap_readahead(rac, &zonefs_read_iomap_ops);
173206
}
174207

175208
/*
176209
* Map blocks for page writeback. This is used only on conventional zone files,
177210
* which implies that the page range can only be within the fixed inode size.
178211
*/
179-
static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
180-
struct inode *inode, loff_t offset)
212+
static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
213+
struct inode *inode, loff_t offset)
181214
{
182215
struct zonefs_inode_info *zi = ZONEFS_I(inode);
183216

@@ -191,12 +224,12 @@ static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
191224
offset < wpc->iomap.offset + wpc->iomap.length)
192225
return 0;
193226

194-
return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset,
195-
IOMAP_WRITE, &wpc->iomap, NULL);
227+
return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
228+
IOMAP_WRITE, &wpc->iomap, NULL);
196229
}
197230

198231
static const struct iomap_writeback_ops zonefs_writeback_ops = {
199-
.map_blocks = zonefs_map_blocks,
232+
.map_blocks = zonefs_write_map_blocks,
200233
};
201234

202235
static int zonefs_writepage(struct page *page, struct writeback_control *wbc)
@@ -226,7 +259,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis,
226259
return -EINVAL;
227260
}
228261

229-
return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops);
262+
return iomap_swapfile_activate(sis, swap_file, span,
263+
&zonefs_read_iomap_ops);
230264
}
231265

232266
static const struct address_space_operations zonefs_file_aops = {
@@ -647,7 +681,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
647681

648682
/* Serialize against truncates */
649683
filemap_invalidate_lock_shared(inode->i_mapping);
650-
ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
684+
ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
651685
filemap_invalidate_unlock_shared(inode->i_mapping);
652686

653687
sb_end_pagefault(inode->i_sb);
@@ -899,7 +933,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
899933
if (append)
900934
ret = zonefs_file_dio_append(iocb, from);
901935
else
902-
ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
936+
ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
903937
&zonefs_write_dio_ops, 0, NULL, 0);
904938
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
905939
(ret > 0 || ret == -EIOCBQUEUED)) {
@@ -948,7 +982,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
948982
if (ret <= 0)
949983
goto inode_unlock;
950984

951-
ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
985+
ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
952986
if (ret > 0)
953987
iocb->ki_pos += ret;
954988
else if (ret == -EIO)
@@ -1041,7 +1075,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
10411075
goto inode_unlock;
10421076
}
10431077
file_accessed(iocb->ki_filp);
1044-
ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
1078+
ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
10451079
&zonefs_read_dio_ops, 0, NULL, 0);
10461080
} else {
10471081
ret = generic_file_read_iter(iocb, to);

0 commit comments

Comments
 (0)