Skip to content

Commit d0d7f18

Browse files
committed
Merge remote-tracking branch 'linux-block/block-6.15' into xfs tree
We need two patches inside linux-block tree as dependencies of the patch which will follow this merge. Specifically, we need: block: fix race between set_blocksize and read paths block: hoist block size validation code to a separate function Signed-off-by: Carlos Maiolino <[email protected]>
2 parents f0447f8 + f40139f commit d0d7f18

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1443
-648
lines changed

block/bdev.c

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev)
152152
get_order(bsize));
153153
}
154154

155-
int set_blocksize(struct file *file, int size)
155+
/**
156+
* bdev_validate_blocksize - check that this block size is acceptable
157+
* @bdev: blockdevice to check
158+
* @block_size: block size to check
159+
*
160+
* For block device users that do not use buffer heads or the block device
161+
* page cache, make sure that this block size can be used with the device.
162+
*
163+
* Return: On success zero is returned, negative error code on failure.
164+
*/
165+
int bdev_validate_blocksize(struct block_device *bdev, int block_size)
156166
{
157-
struct inode *inode = file->f_mapping->host;
158-
struct block_device *bdev = I_BDEV(inode);
159-
160-
if (blk_validate_block_size(size))
167+
if (blk_validate_block_size(block_size))
161168
return -EINVAL;
162169

163170
/* Size cannot be smaller than the size supported by the device */
164-
if (size < bdev_logical_block_size(bdev))
171+
if (block_size < bdev_logical_block_size(bdev))
165172
return -EINVAL;
166173

174+
return 0;
175+
}
176+
EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
177+
178+
int set_blocksize(struct file *file, int size)
179+
{
180+
struct inode *inode = file->f_mapping->host;
181+
struct block_device *bdev = I_BDEV(inode);
182+
int ret;
183+
184+
ret = bdev_validate_blocksize(bdev, size);
185+
if (ret)
186+
return ret;
187+
167188
if (!file->private_data)
168189
return -EINVAL;
169190

170191
/* Don't change the size if it is same as current */
171192
if (inode->i_blkbits != blksize_bits(size)) {
193+
/*
194+
* Flush and truncate the pagecache before we reconfigure the
195+
* mapping geometry because folio sizes are variable now. If a
196+
* reader has already allocated a folio whose size is smaller
197+
* than the new min_order but invokes readahead after the new
198+
* min_order becomes visible, readahead will think there are
199+
* "zero" blocks per folio and crash. Take the inode and
200+
* invalidation locks to avoid racing with
201+
* read/write/fallocate.
202+
*/
203+
inode_lock(inode);
204+
filemap_invalidate_lock(inode->i_mapping);
205+
172206
sync_blockdev(bdev);
207+
kill_bdev(bdev);
208+
173209
inode->i_blkbits = blksize_bits(size);
174210
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
175211
kill_bdev(bdev);
212+
filemap_invalidate_unlock(inode->i_mapping);
213+
inode_unlock(inode);
176214
}
177215
return 0;
178216
}
@@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part)
777815
blkdev_put_whole(whole);
778816
}
779817

780-
struct block_device *blkdev_get_no_open(dev_t dev)
818+
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
781819
{
782820
struct block_device *bdev;
783821
struct inode *inode;
784822

785823
inode = ilookup(blockdev_superblock, dev);
786-
if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
824+
if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
787825
blk_request_module(dev);
788826
inode = ilookup(blockdev_superblock, dev);
789827
if (inode)
@@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
10051043
if (ret)
10061044
return ERR_PTR(ret);
10071045

1008-
bdev = blkdev_get_no_open(dev);
1046+
bdev = blkdev_get_no_open(dev, true);
10091047
if (!bdev)
10101048
return ERR_PTR(-ENXIO);
10111049

@@ -1275,18 +1313,15 @@ void sync_bdevs(bool wait)
12751313
void bdev_statx(struct path *path, struct kstat *stat,
12761314
u32 request_mask)
12771315
{
1278-
struct inode *backing_inode;
12791316
struct block_device *bdev;
12801317

1281-
backing_inode = d_backing_inode(path->dentry);
1282-
12831318
/*
1284-
* Note that backing_inode is the inode of a block device node file,
1285-
* not the block device's internal inode. Therefore it is *not* valid
1286-
* to use I_BDEV() here; the block device has to be looked up by i_rdev
1319+
* Note that d_backing_inode() returns the block device node inode, not
1320+
* the block device's internal inode. Therefore it is *not* valid to
1321+
* use I_BDEV() here; the block device has to be looked up by i_rdev
12871322
* instead.
12881323
*/
1289-
bdev = blkdev_get_no_open(backing_inode->i_rdev);
1324+
bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
12901325
if (!bdev)
12911326
return;
12921327

block/bio-integrity.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
6666
}
6767
EXPORT_SYMBOL(bio_integrity_alloc);
6868

69-
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
70-
bool dirty)
69+
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs)
7170
{
7271
int i;
7372

74-
for (i = 0; i < nr_vecs; i++) {
75-
if (dirty && !PageCompound(bv[i].bv_page))
76-
set_page_dirty_lock(bv[i].bv_page);
73+
for (i = 0; i < nr_vecs; i++)
7774
unpin_user_page(bv[i].bv_page);
78-
}
7975
}
8076

8177
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
@@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
9187
ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
9288
WARN_ON_ONCE(ret != bytes);
9389

94-
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true);
90+
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs);
9591
}
9692

9793
/**
@@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio)
111107
return;
112108
}
113109

114-
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
115-
bio_data_dir(bio) == READ);
110+
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt);
116111
}
117112

118113
/**
@@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
198193
}
199194

200195
if (write)
201-
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
196+
bio_integrity_unpin_bvec(bvec, nr_vecs);
202197
else
203198
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
204199

@@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
319314
return 0;
320315

321316
release_pages:
322-
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
317+
bio_integrity_unpin_bvec(bvec, nr_bvecs);
323318
free_bvec:
324319
if (bvec != stack_vec)
325320
kfree(bvec);

block/blk-cgroup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
797797
return -EINVAL;
798798
input = skip_spaces(input);
799799

800-
bdev = blkdev_get_no_open(MKDEV(major, minor));
800+
bdev = blkdev_get_no_open(MKDEV(major, minor), false);
801801
if (!bdev)
802802
return -ENODEV;
803803
if (bdev_is_partition(bdev)) {

block/blk-settings.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
6161
/*
6262
* For read-ahead of large files to be effective, we need to read ahead
6363
* at least twice the optimal I/O size.
64+
*
65+
* There is no hardware limitation for the read-ahead size and the user
66+
* might have increased the read-ahead size through sysfs, so don't ever
67+
* decrease it.
6468
*/
65-
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
69+
bdi->ra_pages = max3(bdi->ra_pages,
70+
lim->io_opt * 2 / PAGE_SIZE,
71+
VM_READAHEAD_PAGES);
6672
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
6773
}
6874

block/blk-sysfs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,8 @@ int blk_register_queue(struct gendisk *disk)
909909
out_debugfs_remove:
910910
blk_debugfs_remove(disk);
911911
mutex_unlock(&q->sysfs_lock);
912+
if (queue_is_mq(q))
913+
blk_mq_sysfs_unregister(disk);
912914
out_put_queue_kobj:
913915
kobject_put(&disk->queue_kobj);
914916
return ret;

block/blk-throttle.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
12
#ifndef BLK_THROTTLE_H
23
#define BLK_THROTTLE_H
34

block/blk-zoned.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
343343
op = REQ_OP_ZONE_RESET;
344344

345345
/* Invalidate the page cache, including dirty pages. */
346+
inode_lock(bdev->bd_mapping->host);
346347
filemap_invalidate_lock(bdev->bd_mapping);
347348
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
348349
if (ret)
@@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
364365
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
365366

366367
fail:
367-
if (cmd == BLKRESETZONE)
368+
if (cmd == BLKRESETZONE) {
368369
filemap_invalidate_unlock(bdev->bd_mapping);
370+
inode_unlock(bdev->bd_mapping->host);
371+
}
369372

370373
return ret;
371374
}

block/blk.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
9494
wait_for_completion_io(done);
9595
}
9696

97+
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
98+
void blkdev_put_no_open(struct block_device *bdev);
99+
97100
#define BIO_INLINE_VECS 4
98101
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
99102
gfp_t gfp_mask);

block/fops.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
642642
if (ret)
643643
return ret;
644644

645-
bdev = blkdev_get_no_open(inode->i_rdev);
645+
bdev = blkdev_get_no_open(inode->i_rdev, true);
646646
if (!bdev)
647647
return -ENXIO;
648648

@@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
746746
ret = direct_write_fallback(iocb, from, ret,
747747
blkdev_buffered_write(iocb, from));
748748
} else {
749+
/*
750+
* Take i_rwsem and invalidate_lock to avoid racing with
751+
* set_blocksize changing i_blkbits/folio order and punching
752+
* out the pagecache.
753+
*/
754+
inode_lock_shared(bd_inode);
749755
ret = blkdev_buffered_write(iocb, from);
756+
inode_unlock_shared(bd_inode);
750757
}
751758

752759
if (ret > 0)
@@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
757764

758765
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
759766
{
767+
struct inode *bd_inode = bdev_file_inode(iocb->ki_filp);
760768
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
761769
loff_t size = bdev_nr_bytes(bdev);
762770
loff_t pos = iocb->ki_pos;
@@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
793801
goto reexpand;
794802
}
795803

804+
/*
805+
* Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
806+
* changing i_blkbits/folio order and punching out the pagecache.
807+
*/
808+
inode_lock_shared(bd_inode);
796809
ret = filemap_read(iocb, to, ret);
810+
inode_unlock_shared(bd_inode);
797811

798812
reexpand:
799813
if (unlikely(shorted))
@@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
836850
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
837851
return -EINVAL;
838852

853+
inode_lock(inode);
839854
filemap_invalidate_lock(inode->i_mapping);
840855

841856
/*
@@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
868883

869884
fail:
870885
filemap_invalidate_unlock(inode->i_mapping);
886+
inode_unlock(inode);
871887
return error;
872888
}
873889

block/ioctl.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
142142
if (err)
143143
return err;
144144

145+
inode_lock(bdev->bd_mapping->host);
145146
filemap_invalidate_lock(bdev->bd_mapping);
146147
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
147148
if (err)
@@ -174,6 +175,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
174175
blk_finish_plug(&plug);
175176
fail:
176177
filemap_invalidate_unlock(bdev->bd_mapping);
178+
inode_unlock(bdev->bd_mapping->host);
177179
return err;
178180
}
179181

@@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
199201
end > bdev_nr_bytes(bdev))
200202
return -EINVAL;
201203

204+
inode_lock(bdev->bd_mapping->host);
202205
filemap_invalidate_lock(bdev->bd_mapping);
203206
err = truncate_bdev_range(bdev, mode, start, end - 1);
204207
if (!err)
205208
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
206209
GFP_KERNEL);
207210
filemap_invalidate_unlock(bdev->bd_mapping);
211+
inode_unlock(bdev->bd_mapping->host);
208212
return err;
209213
}
210214

@@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
236240
return -EINVAL;
237241

238242
/* Invalidate the page cache, including dirty pages */
243+
inode_lock(bdev->bd_mapping->host);
239244
filemap_invalidate_lock(bdev->bd_mapping);
240245
err = truncate_bdev_range(bdev, mode, start, end);
241246
if (err)
@@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
246251

247252
fail:
248253
filemap_invalidate_unlock(bdev->bd_mapping);
254+
inode_unlock(bdev->bd_mapping->host);
249255
return err;
250256
}
251257

0 commit comments

Comments
 (0)