Skip to content

Commit a449bf5

Browse files
Qian Caitorvalds
authored andcommitted
mm/swapfile: fix and annotate various data races
swap_info_struct si.highest_bit, si.swap_map[offset] and si.flags could be accessed concurrently separately as noticed by KCSAN, === si.highest_bit === write to 0xffff8d5abccdc4d4 of 4 bytes by task 5353 on cpu 24: swap_range_alloc+0x81/0x130 swap_range_alloc at mm/swapfile.c:681 scan_swap_map_slots+0x371/0xb90 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff8d5abccdc4d4 of 4 bytes by task 6672 on cpu 70: scan_swap_map_slots+0x4a6/0xb90 scan_swap_map_slots at mm/swapfile.c:892 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 Reported by Kernel Concurrency Sanitizer on: CPU: 70 PID: 6672 Comm: oom01 Tainted: G W L 5.5.0-next-20200205+ #3 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 === si.swap_map[offset] === write to 0xffffbc370c29a64c of 1 bytes by task 6856 on cpu 86: __swap_entry_free_locked+0x8c/0x100 __swap_entry_free_locked at mm/swapfile.c:1209 (discriminator 4) __swap_entry_free.constprop.20+0x69/0xb0 free_swap_and_cache+0x53/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 read to 0xffffbc370c29a64c of 1 bytes by task 6855 on cpu 20: _swap_info_get+0x81/0xa0 _swap_info_get at mm/swapfile.c:1140 free_swap_and_cache+0x40/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 === si.flags === write to 0xffff956c8fc6c400 of 8 bytes by task 6087 on cpu 23: scan_swap_map_slots+0x6fe/0xb50 scan_swap_map_slots at mm/swapfile.c:887 get_swap_pages+0x39d/0x5c0 get_swap_page+0x377/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff956c8fc6c400 of 8 bytes by task 6207 on cpu 63: _swap_info_get+0x41/0xa0 __swap_info_get at mm/swapfile.c:1114 put_swap_page+0x84/0x490 __remove_mapping+0x384/0x5f0 shrink_page_list+0xff1/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 The writes are under si->lock but the reads are not. For si.highest_bit and si.swap_map[offset], data race could trigger logic bugs, so fix them by having WRITE_ONCE() for the writes and READ_ONCE() for the reads except those isolated reads where they compare against zero which a data race would cause no harm. Thus, annotate them as intentional data races using the data_race() macro. For si.flags, the readers are only interested in a single bit where a data race there would cause no issue there. [[email protected]: add a missing annotation for si->flags in memory.c] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Qian Cai <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Cc: Marco Elver <[email protected]> Cc: Hugh Dickins <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent e630bfa commit a449bf5

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

mm/memory.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3130,8 +3130,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
31303130
if (!page) {
31313131
struct swap_info_struct *si = swp_swap_info(entry);
31323132

3133-
if (si->flags & SWP_SYNCHRONOUS_IO &&
3134-
__swap_count(entry) == 1) {
3133+
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
3134+
__swap_count(entry) == 1) {
31353135
/* skip swapcache */
31363136
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
31373137
vmf->address);

mm/swapfile.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
672672
if (offset == si->lowest_bit)
673673
si->lowest_bit += nr_entries;
674674
if (end == si->highest_bit)
675-
si->highest_bit -= nr_entries;
675+
WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries);
676676
si->inuse_pages += nr_entries;
677677
if (si->inuse_pages == si->pages) {
678678
si->lowest_bit = si->max;
@@ -705,7 +705,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
705705
if (end > si->highest_bit) {
706706
bool was_full = !si->highest_bit;
707707

708-
si->highest_bit = end;
708+
WRITE_ONCE(si->highest_bit, end);
709709
if (was_full && (si->flags & SWP_WRITEOK))
710710
add_to_avail_list(si);
711711
}
@@ -870,7 +870,7 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
870870
else
871871
goto done;
872872
}
873-
si->swap_map[offset] = usage;
873+
WRITE_ONCE(si->swap_map[offset], usage);
874874
inc_cluster_info_page(si, si->cluster_info, offset);
875875
unlock_cluster(ci);
876876

@@ -929,12 +929,13 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
929929

930930
scan:
931931
spin_unlock(&si->lock);
932-
while (++offset <= si->highest_bit) {
933-
if (!si->swap_map[offset]) {
932+
while (++offset <= READ_ONCE(si->highest_bit)) {
933+
if (data_race(!si->swap_map[offset])) {
934934
spin_lock(&si->lock);
935935
goto checks;
936936
}
937-
if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
937+
if (vm_swap_full() &&
938+
READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
938939
spin_lock(&si->lock);
939940
goto checks;
940941
}
@@ -946,11 +947,12 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
946947
}
947948
offset = si->lowest_bit;
948949
while (offset < scan_base) {
949-
if (!si->swap_map[offset]) {
950+
if (data_race(!si->swap_map[offset])) {
950951
spin_lock(&si->lock);
951952
goto checks;
952953
}
953-
if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
954+
if (vm_swap_full() &&
955+
READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
954956
spin_lock(&si->lock);
955957
goto checks;
956958
}
@@ -1149,7 +1151,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
11491151
p = swp_swap_info(entry);
11501152
if (!p)
11511153
goto bad_nofile;
1152-
if (!(p->flags & SWP_USED))
1154+
if (data_race(!(p->flags & SWP_USED)))
11531155
goto bad_device;
11541156
offset = swp_offset(entry);
11551157
if (offset >= p->max)
@@ -1175,7 +1177,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
11751177
p = __swap_info_get(entry);
11761178
if (!p)
11771179
goto out;
1178-
if (!p->swap_map[swp_offset(entry)])
1180+
if (data_race(!p->swap_map[swp_offset(entry)]))
11791181
goto bad_free;
11801182
return p;
11811183

@@ -1244,7 +1246,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
12441246
}
12451247

12461248
usage = count | has_cache;
1247-
p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
1249+
if (usage)
1250+
WRITE_ONCE(p->swap_map[offset], usage);
1251+
else
1252+
WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE);
12481253

12491254
return usage;
12501255
}
@@ -1296,7 +1301,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
12961301
goto bad_nofile;
12971302

12981303
rcu_read_lock();
1299-
if (!(si->flags & SWP_VALID))
1304+
if (data_race(!(si->flags & SWP_VALID)))
13001305
goto unlock_out;
13011306
offset = swp_offset(entry);
13021307
if (offset >= si->max)
@@ -3484,7 +3489,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
34843489
} else
34853490
err = -ENOENT; /* unused swap entry */
34863491

3487-
p->swap_map[offset] = count | has_cache;
3492+
WRITE_ONCE(p->swap_map[offset], count | has_cache);
34883493

34893494
unlock_out:
34903495
unlock_cluster_or_swap_info(p, ci);

0 commit comments

Comments
 (0)