Skip to content

Commit c780e86

Browse files
jankaraaxboe
authored andcommitted
blktrace: Protect q->blk_trace with RCU
KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: [email protected] Reviewed-by: Chaitanya Kulkarni <[email protected]> Reviewed-by: Ming Lei <[email protected]> Tested-by: Ming Lei <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Reported-by: Tristan Madani <[email protected]> Signed-off-by: Jan Kara <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 01e99ae commit c780e86

File tree

3 files changed

+97
-37
lines changed

3 files changed

+97
-37
lines changed

include/linux/blkdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ struct request_queue {
524524
unsigned int sg_reserved_size;
525525
int node;
526526
#ifdef CONFIG_BLK_DEV_IO_TRACE
527-
struct blk_trace *blk_trace;
527+
struct blk_trace __rcu *blk_trace;
528528
struct mutex blk_trace_mutex;
529529
#endif
530530
/*

include/linux/blktrace_api.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,28 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
5151
**/
5252
#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
5353
do { \
54-
struct blk_trace *bt = (q)->blk_trace; \
54+
struct blk_trace *bt; \
55+
\
56+
rcu_read_lock(); \
57+
bt = rcu_dereference((q)->blk_trace); \
5558
if (unlikely(bt)) \
5659
__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
60+
rcu_read_unlock(); \
5761
} while (0)
5862
#define blk_add_trace_msg(q, fmt, ...) \
5963
blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
6064
#define BLK_TN_MAX_MSG 128
6165

6266
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
6367
{
64-
struct blk_trace *bt = q->blk_trace;
65-
if (likely(!bt))
66-
return false;
67-
return bt->act_mask & BLK_TC_NOTIFY;
68+
struct blk_trace *bt;
69+
bool ret;
70+
71+
rcu_read_lock();
72+
bt = rcu_dereference(q->blk_trace);
73+
ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
74+
rcu_read_unlock();
75+
return ret;
6876
}
6977

7078
extern void blk_add_driver_data(struct request_queue *q, struct request *rq,

kernel/trace/blktrace.c

Lines changed: 83 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
335335

336336
static void blk_trace_cleanup(struct blk_trace *bt)
337337
{
338+
synchronize_rcu();
338339
blk_trace_free(bt);
339340
put_probe_ref();
340341
}
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
629630
static int __blk_trace_startstop(struct request_queue *q, int start)
630631
{
631632
int ret;
632-
struct blk_trace *bt = q->blk_trace;
633+
struct blk_trace *bt;
633634

635+
bt = rcu_dereference_protected(q->blk_trace,
636+
lockdep_is_held(&q->blk_trace_mutex));
634637
if (bt == NULL)
635638
return -EINVAL;
636639

@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
740743
void blk_trace_shutdown(struct request_queue *q)
741744
{
742745
mutex_lock(&q->blk_trace_mutex);
743-
744-
if (q->blk_trace) {
746+
if (rcu_dereference_protected(q->blk_trace,
747+
lockdep_is_held(&q->blk_trace_mutex))) {
745748
__blk_trace_startstop(q, 0);
746749
__blk_trace_remove(q);
747750
}
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
752755
#ifdef CONFIG_BLK_CGROUP
753756
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
754757
{
755-
struct blk_trace *bt = q->blk_trace;
758+
struct blk_trace *bt;
756759

760+
/* We don't use the 'bt' value here except as an optimization... */
761+
bt = rcu_dereference_protected(q->blk_trace, 1);
757762
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
758763
return 0;
759764

@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
796801
static void blk_add_trace_rq(struct request *rq, int error,
797802
unsigned int nr_bytes, u32 what, u64 cgid)
798803
{
799-
struct blk_trace *bt = rq->q->blk_trace;
804+
struct blk_trace *bt;
800805

801-
if (likely(!bt))
806+
rcu_read_lock();
807+
bt = rcu_dereference(rq->q->blk_trace);
808+
if (likely(!bt)) {
809+
rcu_read_unlock();
802810
return;
811+
}
803812

804813
if (blk_rq_is_passthrough(rq))
805814
what |= BLK_TC_ACT(BLK_TC_PC);
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
808817

809818
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
810819
rq->cmd_flags, what, error, 0, NULL, cgid);
820+
rcu_read_unlock();
811821
}
812822

813823
static void blk_add_trace_rq_insert(void *ignore,
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
853863
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
854864
u32 what, int error)
855865
{
856-
struct blk_trace *bt = q->blk_trace;
866+
struct blk_trace *bt;
857867

858-
if (likely(!bt))
868+
rcu_read_lock();
869+
bt = rcu_dereference(q->blk_trace);
870+
if (likely(!bt)) {
871+
rcu_read_unlock();
859872
return;
873+
}
860874

861875
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
862876
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
863877
blk_trace_bio_get_cgid(q, bio));
878+
rcu_read_unlock();
864879
}
865880

866881
static void blk_add_trace_bio_bounce(void *ignore,
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
905920
if (bio)
906921
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
907922
else {
908-
struct blk_trace *bt = q->blk_trace;
923+
struct blk_trace *bt;
909924

925+
rcu_read_lock();
926+
bt = rcu_dereference(q->blk_trace);
910927
if (bt)
911928
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
912929
NULL, 0);
930+
rcu_read_unlock();
913931
}
914932
}
915933

@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
921939
if (bio)
922940
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
923941
else {
924-
struct blk_trace *bt = q->blk_trace;
942+
struct blk_trace *bt;
925943

944+
rcu_read_lock();
945+
bt = rcu_dereference(q->blk_trace);
926946
if (bt)
927947
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
928948
0, 0, NULL, 0);
949+
rcu_read_unlock();
929950
}
930951
}
931952

932953
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
933954
{
934-
struct blk_trace *bt = q->blk_trace;
955+
struct blk_trace *bt;
935956

957+
rcu_read_lock();
958+
bt = rcu_dereference(q->blk_trace);
936959
if (bt)
937960
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
961+
rcu_read_unlock();
938962
}
939963

940964
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
941965
unsigned int depth, bool explicit)
942966
{
943-
struct blk_trace *bt = q->blk_trace;
967+
struct blk_trace *bt;
944968

969+
rcu_read_lock();
970+
bt = rcu_dereference(q->blk_trace);
945971
if (bt) {
946972
__be64 rpdu = cpu_to_be64(depth);
947973
u32 what;
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
953979

954980
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
955981
}
982+
rcu_read_unlock();
956983
}
957984

958985
static void blk_add_trace_split(void *ignore,
959986
struct request_queue *q, struct bio *bio,
960987
unsigned int pdu)
961988
{
962-
struct blk_trace *bt = q->blk_trace;
989+
struct blk_trace *bt;
963990

991+
rcu_read_lock();
992+
bt = rcu_dereference(q->blk_trace);
964993
if (bt) {
965994
__be64 rpdu = cpu_to_be64(pdu);
966995

@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
969998
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
970999
&rpdu, blk_trace_bio_get_cgid(q, bio));
9711000
}
1001+
rcu_read_unlock();
9721002
}
9731003

9741004
/**
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
9881018
struct request_queue *q, struct bio *bio,
9891019
dev_t dev, sector_t from)
9901020
{
991-
struct blk_trace *bt = q->blk_trace;
1021+
struct blk_trace *bt;
9921022
struct blk_io_trace_remap r;
9931023

994-
if (likely(!bt))
1024+
rcu_read_lock();
1025+
bt = rcu_dereference(q->blk_trace);
1026+
if (likely(!bt)) {
1027+
rcu_read_unlock();
9951028
return;
1029+
}
9961030

9971031
r.device_from = cpu_to_be32(dev);
9981032
r.device_to = cpu_to_be32(bio_dev(bio));
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
10011035
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
10021036
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
10031037
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
1038+
rcu_read_unlock();
10041039
}
10051040

10061041
/**
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
10211056
struct request *rq, dev_t dev,
10221057
sector_t from)
10231058
{
1024-
struct blk_trace *bt = q->blk_trace;
1059+
struct blk_trace *bt;
10251060
struct blk_io_trace_remap r;
10261061

1027-
if (likely(!bt))
1062+
rcu_read_lock();
1063+
bt = rcu_dereference(q->blk_trace);
1064+
if (likely(!bt)) {
1065+
rcu_read_unlock();
10281066
return;
1067+
}
10291068

10301069
r.device_from = cpu_to_be32(dev);
10311070
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
10341073
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
10351074
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
10361075
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
1076+
rcu_read_unlock();
10371077
}
10381078

10391079
/**
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
10511091
struct request *rq,
10521092
void *data, size_t len)
10531093
{
1054-
struct blk_trace *bt = q->blk_trace;
1094+
struct blk_trace *bt;
10551095

1056-
if (likely(!bt))
1096+
rcu_read_lock();
1097+
bt = rcu_dereference(q->blk_trace);
1098+
if (likely(!bt)) {
1099+
rcu_read_unlock();
10571100
return;
1101+
}
10581102

10591103
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
10601104
BLK_TA_DRV_DATA, 0, len, data,
10611105
blk_trace_request_get_cgid(q, rq));
1106+
rcu_read_unlock();
10621107
}
10631108
EXPORT_SYMBOL_GPL(blk_add_driver_data);
10641109

@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
15971642
return -EINVAL;
15981643

15991644
put_probe_ref();
1645+
synchronize_rcu();
16001646
blk_trace_free(bt);
16011647
return 0;
16021648
}
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
17581804
struct hd_struct *p = dev_to_part(dev);
17591805
struct request_queue *q;
17601806
struct block_device *bdev;
1807+
struct blk_trace *bt;
17611808
ssize_t ret = -ENXIO;
17621809

17631810
bdev = bdget(part_devt(p));
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
17701817

17711818
mutex_lock(&q->blk_trace_mutex);
17721819

1820+
bt = rcu_dereference_protected(q->blk_trace,
1821+
lockdep_is_held(&q->blk_trace_mutex));
17731822
if (attr == &dev_attr_enable) {
1774-
ret = sprintf(buf, "%u\n", !!q->blk_trace);
1823+
ret = sprintf(buf, "%u\n", !!bt);
17751824
goto out_unlock_bdev;
17761825
}
17771826

1778-
if (q->blk_trace == NULL)
1827+
if (bt == NULL)
17791828
ret = sprintf(buf, "disabled\n");
17801829
else if (attr == &dev_attr_act_mask)
1781-
ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1830+
ret = blk_trace_mask2str(buf, bt->act_mask);
17821831
else if (attr == &dev_attr_pid)
1783-
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1832+
ret = sprintf(buf, "%u\n", bt->pid);
17841833
else if (attr == &dev_attr_start_lba)
1785-
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1834+
ret = sprintf(buf, "%llu\n", bt->start_lba);
17861835
else if (attr == &dev_attr_end_lba)
1787-
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1836+
ret = sprintf(buf, "%llu\n", bt->end_lba);
17881837

17891838
out_unlock_bdev:
17901839
mutex_unlock(&q->blk_trace_mutex);
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
18011850
struct block_device *bdev;
18021851
struct request_queue *q;
18031852
struct hd_struct *p;
1853+
struct blk_trace *bt;
18041854
u64 value;
18051855
ssize_t ret = -EINVAL;
18061856

@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
18311881

18321882
mutex_lock(&q->blk_trace_mutex);
18331883

1884+
bt = rcu_dereference_protected(q->blk_trace,
1885+
lockdep_is_held(&q->blk_trace_mutex));
18341886
if (attr == &dev_attr_enable) {
1835-
if (!!value == !!q->blk_trace) {
1887+
if (!!value == !!bt) {
18361888
ret = 0;
18371889
goto out_unlock_bdev;
18381890
}
@@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
18441896
}
18451897

18461898
ret = 0;
1847-
if (q->blk_trace == NULL)
1899+
if (bt == NULL)
18481900
ret = blk_trace_setup_queue(q, bdev);
18491901

18501902
if (ret == 0) {
18511903
if (attr == &dev_attr_act_mask)
1852-
q->blk_trace->act_mask = value;
1904+
bt->act_mask = value;
18531905
else if (attr == &dev_attr_pid)
1854-
q->blk_trace->pid = value;
1906+
bt->pid = value;
18551907
else if (attr == &dev_attr_start_lba)
1856-
q->blk_trace->start_lba = value;
1908+
bt->start_lba = value;
18571909
else if (attr == &dev_attr_end_lba)
1858-
q->blk_trace->end_lba = value;
1910+
bt->end_lba = value;
18591911
}
18601912

18611913
out_unlock_bdev:

0 commit comments

Comments
 (0)