Skip to content

Commit b488092

Browse files
Waiman-Longgregkh
authored andcommitted
blktrace: Fix potential deadlock between delete & sysfs ops
commit 5acb3cc upstream. The lockdep code had reported the following unsafe locking scenario: CPU0 CPU1 ---- ---- lock(s_active#228); lock(&bdev->bd_mutex/1); lock(s_active#228); lock(&bdev->bd_mutex); *** DEADLOCK *** The deadlock may happen when one task (CPU1) is trying to delete a partition in a block device and another task (CPU0) is accessing tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that partition. The s_active isn't an actual lock. It is a reference count (kn->count) on the sysfs (kernfs) file. Removal of a sysfs file, however, require a wait until all the references are gone. The reference count is treated like a rwsem using lockdep instrumentation code. The fact that a thread is in the sysfs callback method or in the ioctl call means there is a reference to the opended sysfs or device file. That should prevent the underlying block structure from being removed. Instead of using bd_mutex in the block_device structure, a new blk_trace_mutex is now added to the request_queue structure to protect access to the blk_trace structure. Suggested-by: Christoph Hellwig <[email protected]> Signed-off-by: Waiman Long <[email protected]> Acked-by: Steven Rostedt (VMware) <[email protected]> Fix typo in patch subject line, and prune a comment detailing how the code used to work. Signed-off-by: Jens Axboe <[email protected]> Signed-off-by: Ben Hutchings <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent badbe56 commit b488092

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

block/blk-core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
729729

730730
kobject_init(&q->kobj, &blk_queue_ktype);
731731

732+
#ifdef CONFIG_BLK_DEV_IO_TRACE
733+
mutex_init(&q->blk_trace_mutex);
734+
#endif
732735
mutex_init(&q->sysfs_lock);
733736
spin_lock_init(&q->__queue_lock);
734737

include/linux/blkdev.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ struct request_queue {
446446
int node;
447447
#ifdef CONFIG_BLK_DEV_IO_TRACE
448448
struct blk_trace *blk_trace;
449+
struct mutex blk_trace_mutex;
449450
#endif
450451
/*
451452
* for flush operations

kernel/trace/blktrace.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
644644
}
645645
EXPORT_SYMBOL_GPL(blk_trace_startstop);
646646

647+
/*
648+
* When reading or writing the blktrace sysfs files, the references to the
649+
* opened sysfs or device files should prevent the underlying block device
650+
* from being removed. So no further delete protection is really needed.
651+
*/
652+
647653
/**
648654
* blk_trace_ioctl: - handle the ioctls associated with tracing
649655
* @bdev: the block device
@@ -661,7 +667,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
661667
if (!q)
662668
return -ENXIO;
663669

664-
mutex_lock(&bdev->bd_mutex);
670+
mutex_lock(&q->blk_trace_mutex);
665671

666672
switch (cmd) {
667673
case BLKTRACESETUP:
@@ -687,7 +693,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
687693
break;
688694
}
689695

690-
mutex_unlock(&bdev->bd_mutex);
696+
mutex_unlock(&q->blk_trace_mutex);
691697
return ret;
692698
}
693699

@@ -1656,7 +1662,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
16561662
if (q == NULL)
16571663
goto out_bdput;
16581664

1659-
mutex_lock(&bdev->bd_mutex);
1665+
mutex_lock(&q->blk_trace_mutex);
16601666

16611667
if (attr == &dev_attr_enable) {
16621668
ret = sprintf(buf, "%u\n", !!q->blk_trace);
@@ -1675,7 +1681,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
16751681
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
16761682

16771683
out_unlock_bdev:
1678-
mutex_unlock(&bdev->bd_mutex);
1684+
mutex_unlock(&q->blk_trace_mutex);
16791685
out_bdput:
16801686
bdput(bdev);
16811687
out:
@@ -1717,7 +1723,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17171723
if (q == NULL)
17181724
goto out_bdput;
17191725

1720-
mutex_lock(&bdev->bd_mutex);
1726+
mutex_lock(&q->blk_trace_mutex);
17211727

17221728
if (attr == &dev_attr_enable) {
17231729
if (!!value == !!q->blk_trace) {
@@ -1747,7 +1753,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17471753
}
17481754

17491755
out_unlock_bdev:
1750-
mutex_unlock(&bdev->bd_mutex);
1756+
mutex_unlock(&q->blk_trace_mutex);
17511757
out_bdput:
17521758
bdput(bdev);
17531759
out:

0 commit comments

Comments
 (0)