Skip to content

Commit c8cc881

Browse files
riteshharjanitytso
authored andcommitted
ext4: Add support for blocksize < pagesize in dioread_nolock
This patch adds the support for blocksize < pagesize for dioread_nolock feature. Since in case of blocksize < pagesize, we can have multiple small buffers of page as unwritten extents, we need to maintain a vector of these unwritten extents which needs the conversion after the IO is complete. Thus, we maintain a list of tuple <offset, size> pair (io_end_vec) for this & traverse this list to do the unwritten to written conversion. Signed-off-by: Ritesh Harjani <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 2943fdb commit c8cc881

File tree

4 files changed

+82
-29
lines changed

4 files changed

+82
-29
lines changed

fs/ext4/ext4.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,12 @@ struct ext4_system_blocks {
198198
*/
199199
#define EXT4_IO_END_UNWRITTEN 0x0001
200200

201+
struct ext4_io_end_vec {
202+
struct list_head list; /* list of io_end_vec */
203+
loff_t offset; /* offset in the file */
204+
ssize_t size; /* size of the extent */
205+
};
206+
201207
/*
202208
* For converting unwritten extents on a work queue. 'handle' is used for
203209
* buffered writeback.
@@ -211,8 +217,7 @@ typedef struct ext4_io_end {
211217
* bios covering the extent */
212218
unsigned int flag; /* unwritten or not */
213219
atomic_t count; /* reference counter */
214-
loff_t offset; /* offset in the file */
215-
ssize_t size; /* size of the extent */
220+
struct list_head list_vec; /* list of ext4_io_end_vec */
216221
} ext4_io_end_t;
217222

218223
struct ext4_io_submit {
@@ -3324,6 +3329,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
33243329
int len,
33253330
struct writeback_control *wbc,
33263331
bool keep_towrite);
3332+
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
3333+
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
33273334

33283335
/* mmp.c */
33293336
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);

fs/ext4/extents.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5005,6 +5005,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
50055005
int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
50065006
{
50075007
int ret, err = 0;
5008+
struct ext4_io_end_vec *io_end_vec;
50085009

50095010
/*
50105011
* This is somewhat ugly but the idea is clear: When transaction is
@@ -5018,8 +5019,14 @@ int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
50185019
return PTR_ERR(handle);
50195020
}
50205021

5021-
ret = ext4_convert_unwritten_extents(handle, io_end->inode,
5022-
io_end->offset, io_end->size);
5022+
list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
5023+
ret = ext4_convert_unwritten_extents(handle, io_end->inode,
5024+
io_end_vec->offset,
5025+
io_end_vec->size);
5026+
if (ret)
5027+
break;
5028+
}
5029+
50235030
if (handle)
50245031
err = ext4_journal_stop(handle);
50255032

fs/ext4/inode.c

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,6 +2364,9 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
23642364
ext4_lblk_t lblk = *m_lblk;
23652365
ext4_fsblk_t pblock = *m_pblk;
23662366
int err = 0;
2367+
int blkbits = mpd->inode->i_blkbits;
2368+
ssize_t io_end_size = 0;
2369+
struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
23672370

23682371
bh = head = page_buffers(page);
23692372
do {
@@ -2376,17 +2379,16 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
23762379
*/
23772380
mpd->map.m_len = 0;
23782381
mpd->map.m_flags = 0;
2382+
io_end_vec->size += io_end_size;
2383+
io_end_size = 0;
23792384

2380-
/*
2381-
* FIXME: If dioread_nolock supports
2382-
* blocksize < pagesize, we need to make
2383-
* sure we add size mapped so far to
2384-
* io_end->size as the following call
2385-
* can submit the page for IO.
2386-
*/
23872385
err = mpage_process_page_bufs(mpd, head, bh, lblk);
23882386
if (err > 0)
23892387
err = 0;
2388+
if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
2389+
io_end_vec = ext4_alloc_io_end_vec(io_end);
2390+
io_end_vec->offset = mpd->map.m_lblk << blkbits;
2391+
}
23902392
*map_bh = true;
23912393
goto out;
23922394
}
@@ -2395,13 +2397,11 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
23952397
bh->b_blocknr = pblock++;
23962398
}
23972399
clear_buffer_unwritten(bh);
2400+
io_end_size += (1 << blkbits);
23982401
} while (lblk++, (bh = bh->b_this_page) != head);
2399-
/*
2400-
* FIXME: This is going to break if dioread_nolock
2401-
* supports blocksize < pagesize as we will try to
2402-
* convert potentially unmapped parts of inode.
2403-
*/
2404-
io_end->size += PAGE_SIZE;
2402+
2403+
io_end_vec->size += io_end_size;
2404+
io_end_size = 0;
24052405
*map_bh = false;
24062406
out:
24072407
*m_lblk = lblk;
@@ -2551,9 +2551,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
25512551
int err;
25522552
loff_t disksize;
25532553
int progress = 0;
2554+
ext4_io_end_t *io_end = mpd->io_submit.io_end;
2555+
struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end);
25542556

2555-
mpd->io_submit.io_end->offset =
2556-
((loff_t)map->m_lblk) << inode->i_blkbits;
2557+
io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
25572558
do {
25582559
err = mpage_map_one_extent(handle, mpd);
25592560
if (err < 0) {
@@ -3654,6 +3655,7 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
36543655
ssize_t size, void *private)
36553656
{
36563657
ext4_io_end_t *io_end = private;
3658+
struct ext4_io_end_vec *io_end_vec;
36573659

36583660
/* if not async direct IO just return */
36593661
if (!io_end)
@@ -3671,8 +3673,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
36713673
ext4_clear_io_unwritten_flag(io_end);
36723674
size = 0;
36733675
}
3674-
io_end->offset = offset;
3675-
io_end->size = size;
3676+
io_end_vec = ext4_alloc_io_end_vec(io_end);
3677+
io_end_vec->offset = offset;
3678+
io_end_vec->size = size;
36763679
ext4_put_io_end(io_end);
36773680

36783681
return 0;

fs/ext4/page-io.c

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,56 @@
3131
#include "acl.h"
3232

3333
static struct kmem_cache *io_end_cachep;
34+
static struct kmem_cache *io_end_vec_cachep;
3435

3536
int __init ext4_init_pageio(void)
3637
{
3738
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
3839
if (io_end_cachep == NULL)
3940
return -ENOMEM;
41+
42+
io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
43+
if (io_end_vec_cachep == NULL) {
44+
kmem_cache_destroy(io_end_cachep);
45+
return -ENOMEM;
46+
}
4047
return 0;
4148
}
4249

4350
void ext4_exit_pageio(void)
4451
{
4552
kmem_cache_destroy(io_end_cachep);
53+
kmem_cache_destroy(io_end_vec_cachep);
54+
}
55+
56+
struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
57+
{
58+
struct ext4_io_end_vec *io_end_vec;
59+
60+
io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
61+
if (!io_end_vec)
62+
return ERR_PTR(-ENOMEM);
63+
INIT_LIST_HEAD(&io_end_vec->list);
64+
list_add_tail(&io_end_vec->list, &io_end->list_vec);
65+
return io_end_vec;
66+
}
67+
68+
static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
69+
{
70+
struct ext4_io_end_vec *io_end_vec, *tmp;
71+
72+
if (list_empty(&io_end->list_vec))
73+
return;
74+
list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
75+
list_del(&io_end_vec->list);
76+
kmem_cache_free(io_end_vec_cachep, io_end_vec);
77+
}
78+
}
79+
80+
struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
81+
{
82+
BUG_ON(list_empty(&io_end->list_vec));
83+
return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
4684
}
4785

4886
/*
@@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
125163
ext4_finish_bio(bio);
126164
bio_put(bio);
127165
}
166+
ext4_free_io_end_vec(io_end);
128167
kmem_cache_free(io_end_cachep, io_end);
129168
}
130169

@@ -139,8 +178,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
139178
static int ext4_end_io_end(ext4_io_end_t *io_end)
140179
{
141180
struct inode *inode = io_end->inode;
142-
loff_t offset = io_end->offset;
143-
ssize_t size = io_end->size;
144181
handle_t *handle = io_end->handle;
145182
int ret = 0;
146183

@@ -154,8 +191,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
154191
ext4_msg(inode->i_sb, KERN_EMERG,
155192
"failed to convert unwritten extents to written "
156193
"extents -- potential data loss! "
157-
"(inode %lu, offset %llu, size %zd, error %d)",
158-
inode->i_ino, offset, size, ret);
194+
"(inode %lu, error %d)", inode->i_ino, ret);
159195
}
160196
ext4_clear_io_unwritten_flag(io_end);
161197
ext4_release_io_end(io_end);
@@ -247,6 +283,7 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
247283
if (io_end) {
248284
io_end->inode = inode;
249285
INIT_LIST_HEAD(&io_end->list);
286+
INIT_LIST_HEAD(&io_end->list_vec);
250287
atomic_set(&io_end->count, 1);
251288
}
252289
return io_end;
@@ -255,7 +292,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
255292
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
256293
{
257294
if (atomic_dec_and_test(&io_end->count)) {
258-
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
295+
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
296+
list_empty(&io_end->list_vec)) {
259297
ext4_release_io_end(io_end);
260298
return;
261299
}
@@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio)
307345
struct inode *inode = io_end->inode;
308346

309347
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
310-
"(offset %llu size %ld starting block %llu)",
348+
"starting block %llu)",
311349
bio->bi_status, inode->i_ino,
312-
(unsigned long long) io_end->offset,
313-
(long) io_end->size,
314350
(unsigned long long)
315351
bi_sector >> (inode->i_blkbits - 9));
316352
mapping_set_error(inode->i_mapping,

0 commit comments

Comments
 (0)