Skip to content

Commit 284dba6

Browse files
jankaragregkh
authored andcommitted
blktrace: Protect q->blk_trace with RCU
commit c780e86 upstream. KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: [email protected] Reviewed-by: Chaitanya Kulkarni <[email protected]> Reviewed-by: Ming Lei <[email protected]> Tested-by: Ming Lei <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Reported-by: Tristan Madani <[email protected]> Signed-off-by: Jan Kara <[email protected]> Signed-off-by: Jens Axboe <[email protected]> [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 0f62edb commit 284dba6

File tree

3 files changed

+94
-36
lines changed

3 files changed

+94
-36
lines changed

include/linux/blkdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ struct request_queue {
445445
unsigned int sg_reserved_size;
446446
int node;
447447
#ifdef CONFIG_BLK_DEV_IO_TRACE
448-
struct blk_trace *blk_trace;
448+
struct blk_trace __rcu *blk_trace;
449449
struct mutex blk_trace_mutex;
450450
#endif
451451
/*

include/linux/blktrace_api.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,26 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...);
5151
**/
5252
#define blk_add_trace_msg(q, fmt, ...) \
5353
do { \
54-
struct blk_trace *bt = (q)->blk_trace; \
54+
struct blk_trace *bt; \
55+
\
56+
rcu_read_lock(); \
57+
bt = rcu_dereference((q)->blk_trace); \
5558
if (unlikely(bt)) \
5659
__trace_note_message(bt, fmt, ##__VA_ARGS__); \
60+
rcu_read_unlock(); \
5761
} while (0)
5862
#define BLK_TN_MAX_MSG 128
5963

6064
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
6165
{
62-
struct blk_trace *bt = q->blk_trace;
63-
if (likely(!bt))
64-
return false;
65-
return bt->act_mask & BLK_TC_NOTIFY;
66+
struct blk_trace *bt;
67+
bool ret;
68+
69+
rcu_read_lock();
70+
bt = rcu_dereference(q->blk_trace);
71+
ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
72+
rcu_read_unlock();
73+
return ret;
6674
}
6775

6876
extern void blk_add_driver_data(struct request_queue *q, struct request *rq,

kernel/trace/blktrace.c

Lines changed: 80 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ static void put_probe_ref(void)
325325

326326
static void blk_trace_cleanup(struct blk_trace *bt)
327327
{
328+
synchronize_rcu();
328329
blk_trace_free(bt);
329330
put_probe_ref();
330331
}
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
629630
static int __blk_trace_startstop(struct request_queue *q, int start)
630631
{
631632
int ret;
632-
struct blk_trace *bt = q->blk_trace;
633+
struct blk_trace *bt;
633634

635+
bt = rcu_dereference_protected(q->blk_trace,
636+
lockdep_is_held(&q->blk_trace_mutex));
634637
if (bt == NULL)
635638
return -EINVAL;
636639

@@ -739,8 +742,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
739742
void blk_trace_shutdown(struct request_queue *q)
740743
{
741744
mutex_lock(&q->blk_trace_mutex);
742-
743-
if (q->blk_trace) {
745+
if (rcu_dereference_protected(q->blk_trace,
746+
lockdep_is_held(&q->blk_trace_mutex))) {
744747
__blk_trace_startstop(q, 0);
745748
__blk_trace_remove(q);
746749
}
@@ -766,10 +769,14 @@ void blk_trace_shutdown(struct request_queue *q)
766769
static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
767770
unsigned int nr_bytes, u32 what)
768771
{
769-
struct blk_trace *bt = q->blk_trace;
772+
struct blk_trace *bt;
770773

771-
if (likely(!bt))
774+
rcu_read_lock();
775+
bt = rcu_dereference(q->blk_trace);
776+
if (likely(!bt)) {
777+
rcu_read_unlock();
772778
return;
779+
}
773780

774781
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
775782
what |= BLK_TC_ACT(BLK_TC_PC);
@@ -780,6 +787,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
780787
__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, req_op(rq),
781788
rq->cmd_flags, what, rq->errors, 0, NULL);
782789
}
790+
rcu_read_unlock();
783791
}
784792

785793
static void blk_add_trace_rq_abort(void *ignore,
@@ -829,13 +837,18 @@ static void blk_add_trace_rq_complete(void *ignore,
829837
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
830838
u32 what, int error)
831839
{
832-
struct blk_trace *bt = q->blk_trace;
840+
struct blk_trace *bt;
833841

834-
if (likely(!bt))
842+
rcu_read_lock();
843+
bt = rcu_dereference(q->blk_trace);
844+
if (likely(!bt)) {
845+
rcu_read_unlock();
835846
return;
847+
}
836848

837849
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
838850
bio_op(bio), bio->bi_opf, what, error, 0, NULL);
851+
rcu_read_unlock();
839852
}
840853

841854
static void blk_add_trace_bio_bounce(void *ignore,
@@ -880,11 +893,14 @@ static void blk_add_trace_getrq(void *ignore,
880893
if (bio)
881894
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
882895
else {
883-
struct blk_trace *bt = q->blk_trace;
896+
struct blk_trace *bt;
884897

898+
rcu_read_lock();
899+
bt = rcu_dereference(q->blk_trace);
885900
if (bt)
886901
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
887902
NULL);
903+
rcu_read_unlock();
888904
}
889905
}
890906

@@ -896,27 +912,35 @@ static void blk_add_trace_sleeprq(void *ignore,
896912
if (bio)
897913
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
898914
else {
899-
struct blk_trace *bt = q->blk_trace;
915+
struct blk_trace *bt;
900916

917+
rcu_read_lock();
918+
bt = rcu_dereference(q->blk_trace);
901919
if (bt)
902920
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
903921
0, 0, NULL);
922+
rcu_read_unlock();
904923
}
905924
}
906925

907926
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
908927
{
909-
struct blk_trace *bt = q->blk_trace;
928+
struct blk_trace *bt;
910929

930+
rcu_read_lock();
931+
bt = rcu_dereference(q->blk_trace);
911932
if (bt)
912933
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
934+
rcu_read_unlock();
913935
}
914936

915937
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
916938
unsigned int depth, bool explicit)
917939
{
918-
struct blk_trace *bt = q->blk_trace;
940+
struct blk_trace *bt;
919941

942+
rcu_read_lock();
943+
bt = rcu_dereference(q->blk_trace);
920944
if (bt) {
921945
__be64 rpdu = cpu_to_be64(depth);
922946
u32 what;
@@ -928,14 +952,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
928952

929953
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
930954
}
955+
rcu_read_unlock();
931956
}
932957

933958
static void blk_add_trace_split(void *ignore,
934959
struct request_queue *q, struct bio *bio,
935960
unsigned int pdu)
936961
{
937-
struct blk_trace *bt = q->blk_trace;
962+
struct blk_trace *bt;
938963

964+
rcu_read_lock();
965+
bt = rcu_dereference(q->blk_trace);
939966
if (bt) {
940967
__be64 rpdu = cpu_to_be64(pdu);
941968

@@ -944,6 +971,7 @@ static void blk_add_trace_split(void *ignore,
944971
BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
945972
&rpdu);
946973
}
974+
rcu_read_unlock();
947975
}
948976

949977
/**
@@ -963,11 +991,15 @@ static void blk_add_trace_bio_remap(void *ignore,
963991
struct request_queue *q, struct bio *bio,
964992
dev_t dev, sector_t from)
965993
{
966-
struct blk_trace *bt = q->blk_trace;
994+
struct blk_trace *bt;
967995
struct blk_io_trace_remap r;
968996

969-
if (likely(!bt))
997+
rcu_read_lock();
998+
bt = rcu_dereference(q->blk_trace);
999+
if (likely(!bt)) {
1000+
rcu_read_unlock();
9701001
return;
1002+
}
9711003

9721004
r.device_from = cpu_to_be32(dev);
9731005
r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
@@ -976,6 +1008,7 @@ static void blk_add_trace_bio_remap(void *ignore,
9761008
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
9771009
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
9781010
sizeof(r), &r);
1011+
rcu_read_unlock();
9791012
}
9801013

9811014
/**
@@ -996,11 +1029,15 @@ static void blk_add_trace_rq_remap(void *ignore,
9961029
struct request *rq, dev_t dev,
9971030
sector_t from)
9981031
{
999-
struct blk_trace *bt = q->blk_trace;
1032+
struct blk_trace *bt;
10001033
struct blk_io_trace_remap r;
10011034

1002-
if (likely(!bt))
1035+
rcu_read_lock();
1036+
bt = rcu_dereference(q->blk_trace);
1037+
if (likely(!bt)) {
1038+
rcu_read_unlock();
10031039
return;
1040+
}
10041041

10051042
r.device_from = cpu_to_be32(dev);
10061043
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1009,6 +1046,7 @@ static void blk_add_trace_rq_remap(void *ignore,
10091046
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
10101047
rq_data_dir(rq), 0, BLK_TA_REMAP, !!rq->errors,
10111048
sizeof(r), &r);
1049+
rcu_read_unlock();
10121050
}
10131051

10141052
/**
@@ -1026,17 +1064,22 @@ void blk_add_driver_data(struct request_queue *q,
10261064
struct request *rq,
10271065
void *data, size_t len)
10281066
{
1029-
struct blk_trace *bt = q->blk_trace;
1067+
struct blk_trace *bt;
10301068

1031-
if (likely(!bt))
1069+
rcu_read_lock();
1070+
bt = rcu_dereference(q->blk_trace);
1071+
if (likely(!bt)) {
1072+
rcu_read_unlock();
10321073
return;
1074+
}
10331075

10341076
if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
10351077
__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 0,
10361078
BLK_TA_DRV_DATA, rq->errors, len, data);
10371079
else
10381080
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 0,
10391081
BLK_TA_DRV_DATA, rq->errors, len, data);
1082+
rcu_read_unlock();
10401083
}
10411084
EXPORT_SYMBOL_GPL(blk_add_driver_data);
10421085

@@ -1529,6 +1572,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
15291572
return -EINVAL;
15301573

15311574
put_probe_ref();
1575+
synchronize_rcu();
15321576
blk_trace_free(bt);
15331577
return 0;
15341578
}
@@ -1690,6 +1734,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
16901734
struct hd_struct *p = dev_to_part(dev);
16911735
struct request_queue *q;
16921736
struct block_device *bdev;
1737+
struct blk_trace *bt;
16931738
ssize_t ret = -ENXIO;
16941739

16951740
bdev = bdget(part_devt(p));
@@ -1702,21 +1747,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
17021747

17031748
mutex_lock(&q->blk_trace_mutex);
17041749

1750+
bt = rcu_dereference_protected(q->blk_trace,
1751+
lockdep_is_held(&q->blk_trace_mutex));
17051752
if (attr == &dev_attr_enable) {
1706-
ret = sprintf(buf, "%u\n", !!q->blk_trace);
1753+
ret = sprintf(buf, "%u\n", !!bt);
17071754
goto out_unlock_bdev;
17081755
}
17091756

1710-
if (q->blk_trace == NULL)
1757+
if (bt == NULL)
17111758
ret = sprintf(buf, "disabled\n");
17121759
else if (attr == &dev_attr_act_mask)
1713-
ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1760+
ret = blk_trace_mask2str(buf, bt->act_mask);
17141761
else if (attr == &dev_attr_pid)
1715-
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1762+
ret = sprintf(buf, "%u\n", bt->pid);
17161763
else if (attr == &dev_attr_start_lba)
1717-
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1764+
ret = sprintf(buf, "%llu\n", bt->start_lba);
17181765
else if (attr == &dev_attr_end_lba)
1719-
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1766+
ret = sprintf(buf, "%llu\n", bt->end_lba);
17201767

17211768
out_unlock_bdev:
17221769
mutex_unlock(&q->blk_trace_mutex);
@@ -1733,6 +1780,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17331780
struct block_device *bdev;
17341781
struct request_queue *q;
17351782
struct hd_struct *p;
1783+
struct blk_trace *bt;
17361784
u64 value;
17371785
ssize_t ret = -EINVAL;
17381786

@@ -1763,8 +1811,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17631811

17641812
mutex_lock(&q->blk_trace_mutex);
17651813

1814+
bt = rcu_dereference_protected(q->blk_trace,
1815+
lockdep_is_held(&q->blk_trace_mutex));
17661816
if (attr == &dev_attr_enable) {
1767-
if (!!value == !!q->blk_trace) {
1817+
if (!!value == !!bt) {
17681818
ret = 0;
17691819
goto out_unlock_bdev;
17701820
}
@@ -1776,18 +1826,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17761826
}
17771827

17781828
ret = 0;
1779-
if (q->blk_trace == NULL)
1829+
if (bt == NULL)
17801830
ret = blk_trace_setup_queue(q, bdev);
17811831

17821832
if (ret == 0) {
17831833
if (attr == &dev_attr_act_mask)
1784-
q->blk_trace->act_mask = value;
1834+
bt->act_mask = value;
17851835
else if (attr == &dev_attr_pid)
1786-
q->blk_trace->pid = value;
1836+
bt->pid = value;
17871837
else if (attr == &dev_attr_start_lba)
1788-
q->blk_trace->start_lba = value;
1838+
bt->start_lba = value;
17891839
else if (attr == &dev_attr_end_lba)
1790-
q->blk_trace->end_lba = value;
1840+
bt->end_lba = value;
17911841
}
17921842

17931843
out_unlock_bdev:

0 commit comments

Comments
 (0)