Skip to content

Commit e41170c

Browse files
committed
Merge tag 'vfs-6.15-rc1.pagesize' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs pagesize updates from Christian Brauner: "This enables block sizes greater than the page size for block devices. With this we can start supporting block devices with logical block sizes larger than 4k. It also allows to lift the device cache sector size support to 64k. This allows filesystems which can use larger sector sizes up to 64k to ensure that the filesystem will not generate writes that are smaller than the specified sector size" * tag 'vfs-6.15-rc1.pagesize' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: bdev: add back PAGE_SIZE block size validation for sb_set_blocksize() bdev: use bdev_io_min() for statx block size block/bdev: lift block size restrictions to 64k block/bdev: enable large folio support for large logical block sizes fs/buffer fs/mpage: remove large folio restriction fs/mpage: use blocks_per_folio instead of blocks_per_page fs/mpage: avoid negative shift for large blocksize fs/buffer: remove batching from async read fs/buffer: simplify block_read_full_folio() with bh_offset()
2 parents 130e696 + a64e5a5 commit e41170c

File tree

7 files changed

+65
-69
lines changed

7 files changed

+65
-69
lines changed

block/bdev.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ static void set_init_blocksize(struct block_device *bdev)
148148
bsize <<= 1;
149149
}
150150
BD_INODE(bdev)->i_blkbits = blksize_bits(bsize);
151+
mapping_set_folio_min_order(BD_INODE(bdev)->i_mapping,
152+
get_order(bsize));
151153
}
152154

153155
int set_blocksize(struct file *file, int size)
@@ -169,6 +171,7 @@ int set_blocksize(struct file *file, int size)
169171
if (inode->i_blkbits != blksize_bits(size)) {
170172
sync_blockdev(bdev);
171173
inode->i_blkbits = blksize_bits(size);
174+
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
172175
kill_bdev(bdev);
173176
}
174177
return 0;
@@ -178,10 +181,11 @@ EXPORT_SYMBOL(set_blocksize);
178181

179182
int sb_set_blocksize(struct super_block *sb, int size)
180183
{
184+
if (!(sb->s_type->fs_flags & FS_LBS) && size > PAGE_SIZE)
185+
return 0;
181186
if (set_blocksize(sb->s_bdev_file, size))
182187
return 0;
183-
/* If we get here, we know size is power of two
184-
* and it's value is between 512 and PAGE_SIZE */
188+
/* If we get here, we know size is validated */
185189
sb->s_blocksize = size;
186190
sb->s_blocksize_bits = blksize_bits(size);
187191
return sb->s_blocksize;
@@ -1274,9 +1278,6 @@ void bdev_statx(struct path *path, struct kstat *stat,
12741278
struct inode *backing_inode;
12751279
struct block_device *bdev;
12761280

1277-
if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
1278-
return;
1279-
12801281
backing_inode = d_backing_inode(path->dentry);
12811282

12821283
/*
@@ -1303,6 +1304,8 @@ void bdev_statx(struct path *path, struct kstat *stat,
13031304
queue_atomic_write_unit_max_bytes(bd_queue));
13041305
}
13051306

1307+
stat->blksize = bdev_io_min(bdev);
1308+
13061309
blkdev_put_no_open(bdev);
13071310
}
13081311

fs/bcachefs/fs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2396,7 +2396,7 @@ static struct file_system_type bcache_fs_type = {
23962396
.name = "bcachefs",
23972397
.init_fs_context = bch2_init_fs_context,
23982398
.kill_sb = bch2_kill_sb,
2399-
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
2399+
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_LBS,
24002400
};
24012401

24022402
MODULE_ALIAS_FS("bcachefs");

fs/buffer.c

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2361,9 +2361,8 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
23612361
{
23622362
struct inode *inode = folio->mapping->host;
23632363
sector_t iblock, lblock;
2364-
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2364+
struct buffer_head *bh, *head, *prev = NULL;
23652365
size_t blocksize;
2366-
int nr, i;
23672366
int fully_mapped = 1;
23682367
bool page_error = false;
23692368
loff_t limit = i_size_read(inode);
@@ -2372,16 +2371,12 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
23722371
if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
23732372
limit = inode->i_sb->s_maxbytes;
23742373

2375-
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
2376-
23772374
head = folio_create_buffers(folio, inode, 0);
23782375
blocksize = head->b_size;
23792376

23802377
iblock = div_u64(folio_pos(folio), blocksize);
23812378
lblock = div_u64(limit + blocksize - 1, blocksize);
23822379
bh = head;
2383-
nr = 0;
2384-
i = 0;
23852380

23862381
do {
23872382
if (buffer_uptodate(bh))
@@ -2398,7 +2393,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
23982393
page_error = true;
23992394
}
24002395
if (!buffer_mapped(bh)) {
2401-
folio_zero_range(folio, i * blocksize,
2396+
folio_zero_range(folio, bh_offset(bh),
24022397
blocksize);
24032398
if (!err)
24042399
set_buffer_uptodate(bh);
@@ -2411,40 +2406,33 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
24112406
if (buffer_uptodate(bh))
24122407
continue;
24132408
}
2414-
arr[nr++] = bh;
2415-
} while (i++, iblock++, (bh = bh->b_this_page) != head);
2416-
2417-
if (fully_mapped)
2418-
folio_set_mappedtodisk(folio);
2419-
2420-
if (!nr) {
2421-
/*
2422-
* All buffers are uptodate or get_block() returned an
2423-
* error when trying to map them - we can finish the read.
2424-
*/
2425-
folio_end_read(folio, !page_error);
2426-
return 0;
2427-
}
24282409

2429-
/* Stage two: lock the buffers */
2430-
for (i = 0; i < nr; i++) {
2431-
bh = arr[i];
24322410
lock_buffer(bh);
2411+
if (buffer_uptodate(bh)) {
2412+
unlock_buffer(bh);
2413+
continue;
2414+
}
2415+
24332416
mark_buffer_async_read(bh);
2434-
}
2417+
if (prev)
2418+
submit_bh(REQ_OP_READ, prev);
2419+
prev = bh;
2420+
} while (iblock++, (bh = bh->b_this_page) != head);
2421+
2422+
if (fully_mapped)
2423+
folio_set_mappedtodisk(folio);
24352424

24362425
/*
2437-
* Stage 3: start the IO. Check for uptodateness
2438-
* inside the buffer lock in case another process reading
2439-
* the underlying blockdev brought it uptodate (the sct fix).
2426+
* All buffers are uptodate or get_block() returned an error
2427+
* when trying to map them - we must finish the read because
2428+
* end_buffer_async_read() will never be called on any buffer
2429+
* in this folio.
24402430
*/
2441-
for (i = 0; i < nr; i++) {
2442-
bh = arr[i];
2443-
if (buffer_uptodate(bh))
2444-
end_buffer_async_read(bh, 1);
2445-
else
2446-
submit_bh(REQ_OP_READ, bh);
2447-
}
2431+
if (prev)
2432+
submit_bh(REQ_OP_READ, prev);
2433+
else
2434+
folio_end_read(folio, !page_error);
2435+
24482436
return 0;
24492437
}
24502438
EXPORT_SYMBOL(block_read_full_folio);

fs/mpage.c

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
107107
* don't make any buffers if there is only one buffer on
108108
* the folio and the folio just needs to be set up to date
109109
*/
110-
if (inode->i_blkbits == PAGE_SHIFT &&
110+
if (inode->i_blkbits == folio_shift(folio) &&
111111
buffer_uptodate(bh)) {
112112
folio_mark_uptodate(folio);
113113
return;
@@ -153,15 +153,15 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
153153
struct folio *folio = args->folio;
154154
struct inode *inode = folio->mapping->host;
155155
const unsigned blkbits = inode->i_blkbits;
156-
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
156+
const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
157157
const unsigned blocksize = 1 << blkbits;
158158
struct buffer_head *map_bh = &args->map_bh;
159159
sector_t block_in_file;
160160
sector_t last_block;
161161
sector_t last_block_in_file;
162162
sector_t first_block;
163163
unsigned page_block;
164-
unsigned first_hole = blocks_per_page;
164+
unsigned first_hole = blocks_per_folio;
165165
struct block_device *bdev = NULL;
166166
int length;
167167
int fully_mapped = 1;
@@ -170,9 +170,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
170170
unsigned relative_block;
171171
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
172172

173-
/* MAX_BUF_PER_PAGE, for example */
174-
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
175-
176173
if (args->is_readahead) {
177174
opf |= REQ_RAHEAD;
178175
gfp |= __GFP_NORETRY | __GFP_NOWARN;
@@ -181,8 +178,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
181178
if (folio_buffers(folio))
182179
goto confused;
183180

184-
block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
185-
last_block = block_in_file + args->nr_pages * blocks_per_page;
181+
block_in_file = folio_pos(folio) >> blkbits;
182+
last_block = block_in_file + ((args->nr_pages * PAGE_SIZE) >> blkbits);
186183
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
187184
if (last_block > last_block_in_file)
188185
last_block = last_block_in_file;
@@ -204,7 +201,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
204201
clear_buffer_mapped(map_bh);
205202
break;
206203
}
207-
if (page_block == blocks_per_page)
204+
if (page_block == blocks_per_folio)
208205
break;
209206
page_block++;
210207
block_in_file++;
@@ -216,7 +213,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
216213
* Then do more get_blocks calls until we are done with this folio.
217214
*/
218215
map_bh->b_folio = folio;
219-
while (page_block < blocks_per_page) {
216+
while (page_block < blocks_per_folio) {
220217
map_bh->b_state = 0;
221218
map_bh->b_size = 0;
222219

@@ -229,7 +226,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
229226

230227
if (!buffer_mapped(map_bh)) {
231228
fully_mapped = 0;
232-
if (first_hole == blocks_per_page)
229+
if (first_hole == blocks_per_folio)
233230
first_hole = page_block;
234231
page_block++;
235232
block_in_file++;
@@ -247,7 +244,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
247244
goto confused;
248245
}
249246

250-
if (first_hole != blocks_per_page)
247+
if (first_hole != blocks_per_folio)
251248
goto confused; /* hole -> non-hole */
252249

253250
/* Contiguous blocks? */
@@ -260,16 +257,16 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
260257
if (relative_block == nblocks) {
261258
clear_buffer_mapped(map_bh);
262259
break;
263-
} else if (page_block == blocks_per_page)
260+
} else if (page_block == blocks_per_folio)
264261
break;
265262
page_block++;
266263
block_in_file++;
267264
}
268265
bdev = map_bh->b_bdev;
269266
}
270267

271-
if (first_hole != blocks_per_page) {
272-
folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
268+
if (first_hole != blocks_per_folio) {
269+
folio_zero_segment(folio, first_hole << blkbits, folio_size(folio));
273270
if (first_hole == 0) {
274271
folio_mark_uptodate(folio);
275272
folio_unlock(folio);
@@ -303,10 +300,10 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
303300
relative_block = block_in_file - args->first_logical_block;
304301
nblocks = map_bh->b_size >> blkbits;
305302
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
306-
(first_hole != blocks_per_page))
303+
(first_hole != blocks_per_folio))
307304
args->bio = mpage_bio_submit_read(args->bio);
308305
else
309-
args->last_block_in_bio = first_block + blocks_per_page - 1;
306+
args->last_block_in_bio = first_block + blocks_per_folio - 1;
310307
out:
311308
return args->bio;
312309

@@ -385,7 +382,7 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
385382
{
386383
struct mpage_readpage_args args = {
387384
.folio = folio,
388-
.nr_pages = 1,
385+
.nr_pages = folio_nr_pages(folio),
389386
.get_block = get_block,
390387
};
391388

@@ -456,12 +453,12 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
456453
struct address_space *mapping = folio->mapping;
457454
struct inode *inode = mapping->host;
458455
const unsigned blkbits = inode->i_blkbits;
459-
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
456+
const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
460457
sector_t last_block;
461458
sector_t block_in_file;
462459
sector_t first_block;
463460
unsigned page_block;
464-
unsigned first_unmapped = blocks_per_page;
461+
unsigned first_unmapped = blocks_per_folio;
465462
struct block_device *bdev = NULL;
466463
int boundary = 0;
467464
sector_t boundary_block = 0;
@@ -486,12 +483,12 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
486483
*/
487484
if (buffer_dirty(bh))
488485
goto confused;
489-
if (first_unmapped == blocks_per_page)
486+
if (first_unmapped == blocks_per_folio)
490487
first_unmapped = page_block;
491488
continue;
492489
}
493490

494-
if (first_unmapped != blocks_per_page)
491+
if (first_unmapped != blocks_per_folio)
495492
goto confused; /* hole -> non-hole */
496493

497494
if (!buffer_dirty(bh) || !buffer_uptodate(bh))
@@ -527,7 +524,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
527524
* The page has no buffers: map it to disk
528525
*/
529526
BUG_ON(!folio_test_uptodate(folio));
530-
block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
527+
block_in_file = folio_pos(folio) >> blkbits;
531528
/*
532529
* Whole page beyond EOF? Skip allocating blocks to avoid leaking
533530
* space.
@@ -536,7 +533,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
536533
goto page_is_mapped;
537534
last_block = (i_size - 1) >> blkbits;
538535
map_bh.b_folio = folio;
539-
for (page_block = 0; page_block < blocks_per_page; ) {
536+
for (page_block = 0; page_block < blocks_per_folio; ) {
540537

541538
map_bh.b_state = 0;
542539
map_bh.b_size = 1 << blkbits;
@@ -618,14 +615,14 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
618615
BUG_ON(folio_test_writeback(folio));
619616
folio_start_writeback(folio);
620617
folio_unlock(folio);
621-
if (boundary || (first_unmapped != blocks_per_page)) {
618+
if (boundary || (first_unmapped != blocks_per_folio)) {
622619
bio = mpage_bio_submit_write(bio);
623620
if (boundary_block) {
624621
write_boundary_block(boundary_bdev,
625622
boundary_block, 1 << blkbits);
626623
}
627624
} else {
628-
mpd->last_block_in_bio = first_block + blocks_per_page - 1;
625+
mpd->last_block_in_bio = first_block + blocks_per_folio - 1;
629626
}
630627
goto out;
631628

fs/xfs/xfs_super.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2122,7 +2122,8 @@ static struct file_system_type xfs_fs_type = {
21222122
.init_fs_context = xfs_init_fs_context,
21232123
.parameters = xfs_fs_parameters,
21242124
.kill_sb = xfs_kill_sb,
2125-
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
2125+
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
2126+
FS_LBS,
21262127
};
21272128
MODULE_ALIAS_FS("xfs");
21282129

include/linux/blkdev.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,16 @@ static inline dev_t disk_devt(struct gendisk *disk)
268268
return MKDEV(disk->major, disk->first_minor);
269269
}
270270

271+
/*
272+
* We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER)
273+
* however we constrain this to what we can validate and test.
274+
*/
275+
#define BLK_MAX_BLOCK_SIZE SZ_64K
276+
271277
/* blk_validate_limits() validates bsize, so drivers don't usually need to */
272278
static inline int blk_validate_block_size(unsigned long bsize)
273279
{
274-
if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
280+
if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize))
275281
return -EINVAL;
276282

277283
return 0;

include/linux/fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2606,6 +2606,7 @@ struct file_system_type {
26062606
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
26072607
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
26082608
#define FS_MGTIME 64 /* FS uses multigrain timestamps */
2609+
#define FS_LBS 128 /* FS supports LBS */
26092610
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
26102611
int (*init_fs_context)(struct fs_context *);
26112612
const struct fs_parameter_spec *parameters;

0 commit comments

Comments
 (0)