Skip to content

Commit adf12a3

Browse files
committed
Merge tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
Pull perf fixes from Thomas Gleixner: "Perf fixes for perf_mmap() reference counting to prevent potential reference count leaks which are caused by: - VMA splits, which change the offset or size of a mapping, which causes perf_mmap_close() to ignore the unmap or unmap the wrong buffer. - Several internal issues of perf_mmap(), which can cause reference count leaks in the perf mmap, corrupt accounting or cause leaks in perf drivers. The main fix is to prevent VMA splits by implementing the [may_]split() callback for vm operations. The other issues are addressed by rearranging code, early returns on failure and invocation of cleanups. Also provide a selftest to validate the fixes. The reference counting should be converted to refcount_t, but that requires larger refactoring of the code and will be done once these fixes are upstream" * tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git: selftests/perf_events: Add a mmap() correctness test perf/core: Prevent VMA split of buffer mappings perf/core: Handle buffer mapping fail correctly in perf_mmap() perf/core: Exit early on perf_mmap() fail perf/core: Don't leak AUX buffer refcount on allocation failure perf/core: Preserve AUX buffer allocation failure result
2 parents 8466d39 + 084d2ac commit adf12a3

File tree

4 files changed

+266
-9
lines changed

4 files changed

+266
-9
lines changed

kernel/events/core.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
68426842
return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS;
68436843
}
68446844

6845+
static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr)
6846+
{
6847+
/*
6848+
* Forbid splitting perf mappings to prevent refcount leaks due to
6849+
* the resulting non-matching offsets and sizes. See open()/close().
6850+
*/
6851+
return -EINVAL;
6852+
}
6853+
68456854
static const struct vm_operations_struct perf_mmap_vmops = {
68466855
.open = perf_mmap_open,
68476856
.close = perf_mmap_close, /* non mergeable */
68486857
.pfn_mkwrite = perf_mmap_pfn_mkwrite,
6858+
.may_split = perf_mmap_may_split,
68496859
};
68506860

68516861
static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
@@ -7051,8 +7061,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
70517061
ret = 0;
70527062
goto unlock;
70537063
}
7054-
7055-
atomic_set(&rb->aux_mmap_count, 1);
70567064
}
70577065

70587066
user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
@@ -7115,43 +7123,55 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
71157123
perf_event_update_time(event);
71167124
perf_event_init_userpage(event);
71177125
perf_event_update_userpage(event);
7126+
ret = 0;
71187127
} else {
71197128
ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
71207129
event->attr.aux_watermark, flags);
7121-
if (!ret)
7130+
if (!ret) {
7131+
atomic_set(&rb->aux_mmap_count, 1);
71227132
rb->aux_mmap_locked = extra;
7133+
}
71237134
}
71247135

7125-
ret = 0;
7126-
71277136
unlock:
71287137
if (!ret) {
71297138
atomic_long_add(user_extra, &user->locked_vm);
71307139
atomic64_add(extra, &vma->vm_mm->pinned_vm);
71317140

71327141
atomic_inc(&event->mmap_count);
71337142
} else if (rb) {
7143+
/* AUX allocation failed */
71347144
atomic_dec(&rb->mmap_count);
71357145
}
71367146
aux_unlock:
71377147
if (aux_mutex)
71387148
mutex_unlock(aux_mutex);
71397149
mutex_unlock(&event->mmap_mutex);
71407150

7151+
if (ret)
7152+
return ret;
7153+
71417154
/*
71427155
* Since pinned accounting is per vm we cannot allow fork() to copy our
71437156
* vma.
71447157
*/
71457158
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
71467159
vma->vm_ops = &perf_mmap_vmops;
71477160

7148-
if (!ret)
7149-
ret = map_range(rb, vma);
7150-
71517161
mapped = get_mapped(event, event_mapped);
71527162
if (mapped)
71537163
mapped(event, vma->vm_mm);
71547164

7165+
/*
7166+
* Try to map it into the page table. On fail, invoke
7167+
* perf_mmap_close() to undo the above, as the callsite expects
7168+
* full cleanup in this case and therefore does not invoke
7169+
* vmops::close().
7170+
*/
7171+
ret = map_range(rb, vma);
7172+
if (ret)
7173+
perf_mmap_close(vma);
7174+
71557175
return ret;
71567176
}
71577177

tools/testing/selftests/perf_events/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
sigtrap_threads
33
remove_on_exec
44
watermark_signal
5+
mmap

tools/testing/selftests/perf_events/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
33
LDFLAGS += -lpthread
44

5-
TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
5+
TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap
66
include ../lib.mk
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#define _GNU_SOURCE
3+
4+
#include <dirent.h>
5+
#include <sched.h>
6+
#include <stdbool.h>
7+
#include <stdio.h>
8+
#include <unistd.h>
9+
10+
#include <sys/ioctl.h>
11+
#include <sys/mman.h>
12+
#include <sys/syscall.h>
13+
#include <sys/types.h>
14+
15+
#include <linux/perf_event.h>
16+
17+
#include "../kselftest_harness.h"
18+
19+
#define RB_SIZE 0x3000
20+
#define AUX_SIZE 0x10000
21+
#define AUX_OFFS 0x4000
22+
23+
#define HOLE_SIZE 0x1000
24+
25+
/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */
26+
#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE)
27+
#define REGION_AUX_OFFS (2 * RB_SIZE)
28+
29+
#define MAP_BASE 1
30+
#define MAP_AUX 2
31+
32+
#define EVENT_SRC_DIR "/sys/bus/event_source/devices"
33+
34+
FIXTURE(perf_mmap)
35+
{
36+
int fd;
37+
void *ptr;
38+
void *region;
39+
};
40+
41+
FIXTURE_VARIANT(perf_mmap)
42+
{
43+
bool aux;
44+
unsigned long ptr_size;
45+
};
46+
47+
FIXTURE_VARIANT_ADD(perf_mmap, rb)
48+
{
49+
.aux = false,
50+
.ptr_size = RB_SIZE,
51+
};
52+
53+
FIXTURE_VARIANT_ADD(perf_mmap, aux)
54+
{
55+
.aux = true,
56+
.ptr_size = AUX_SIZE,
57+
};
58+
59+
static bool read_event_type(struct dirent *dent, __u32 *type)
60+
{
61+
char typefn[512];
62+
FILE *fp;
63+
int res;
64+
65+
snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name);
66+
fp = fopen(typefn, "r");
67+
if (!fp)
68+
return false;
69+
70+
res = fscanf(fp, "%u", type);
71+
fclose(fp);
72+
return res > 0;
73+
}
74+
75+
FIXTURE_SETUP(perf_mmap)
76+
{
77+
struct perf_event_attr attr = {
78+
.size = sizeof(attr),
79+
.disabled = 1,
80+
.exclude_kernel = 1,
81+
.exclude_hv = 1,
82+
};
83+
struct perf_event_attr attr_ok = {};
84+
unsigned int eacces = 0, map = 0;
85+
struct perf_event_mmap_page *rb;
86+
struct dirent *dent;
87+
void *aux, *region;
88+
DIR *dir;
89+
90+
self->ptr = NULL;
91+
92+
dir = opendir(EVENT_SRC_DIR);
93+
if (!dir)
94+
SKIP(return, "perf not available.");
95+
96+
region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
97+
ASSERT_NE(region, MAP_FAILED);
98+
self->region = region;
99+
100+
// Try to find a suitable event on this system
101+
while ((dent = readdir(dir))) {
102+
int fd;
103+
104+
if (!read_event_type(dent, &attr.type))
105+
continue;
106+
107+
fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
108+
if (fd < 0) {
109+
if (errno == EACCES)
110+
eacces++;
111+
continue;
112+
}
113+
114+
// Check whether the event supports mmap()
115+
rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
116+
if (rb == MAP_FAILED) {
117+
close(fd);
118+
continue;
119+
}
120+
121+
if (!map) {
122+
// Save the event in case that no AUX capable event is found
123+
attr_ok = attr;
124+
map = MAP_BASE;
125+
}
126+
127+
if (!variant->aux)
128+
continue;
129+
130+
rb->aux_offset = AUX_OFFS;
131+
rb->aux_size = AUX_SIZE;
132+
133+
// Check whether it supports a AUX buffer
134+
aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
135+
MAP_SHARED | MAP_FIXED, fd, AUX_OFFS);
136+
if (aux == MAP_FAILED) {
137+
munmap(rb, RB_SIZE);
138+
close(fd);
139+
continue;
140+
}
141+
142+
attr_ok = attr;
143+
map = MAP_AUX;
144+
munmap(aux, AUX_SIZE);
145+
munmap(rb, RB_SIZE);
146+
close(fd);
147+
break;
148+
}
149+
closedir(dir);
150+
151+
if (!map) {
152+
if (!eacces)
153+
SKIP(return, "No mappable perf event found.");
154+
else
155+
SKIP(return, "No permissions for perf_event_open()");
156+
}
157+
158+
self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0);
159+
ASSERT_NE(self->fd, -1);
160+
161+
rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0);
162+
ASSERT_NE(rb, MAP_FAILED);
163+
164+
if (!variant->aux) {
165+
self->ptr = rb;
166+
return;
167+
}
168+
169+
if (map != MAP_AUX)
170+
SKIP(return, "No AUX event found.");
171+
172+
rb->aux_offset = AUX_OFFS;
173+
rb->aux_size = AUX_SIZE;
174+
aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
175+
MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS);
176+
ASSERT_NE(aux, MAP_FAILED);
177+
self->ptr = aux;
178+
}
179+
180+
FIXTURE_TEARDOWN(perf_mmap)
181+
{
182+
ASSERT_EQ(munmap(self->region, REGION_SIZE), 0);
183+
if (self->fd != -1)
184+
ASSERT_EQ(close(self->fd), 0);
185+
}
186+
187+
TEST_F(perf_mmap, remap)
188+
{
189+
void *tmp, *ptr = self->ptr;
190+
unsigned long size = variant->ptr_size;
191+
192+
// Test the invalid remaps
193+
ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
194+
ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
195+
ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED);
196+
// Shrink the end of the mapping such that we only unmap past end of the VMA,
197+
// which should succeed and poke a hole into the PROT_NONE region
198+
ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
199+
200+
// Remap the whole buffer to a new address
201+
tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
202+
ASSERT_NE(tmp, MAP_FAILED);
203+
204+
// Try splitting offset 1 hole size into VMA, this should fail
205+
ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE,
206+
MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED);
207+
// Remapping the whole thing should succeed fine
208+
ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp);
209+
ASSERT_EQ(ptr, tmp);
210+
ASSERT_EQ(munmap(tmp, size), 0);
211+
}
212+
213+
TEST_F(perf_mmap, unmap)
214+
{
215+
unsigned long size = variant->ptr_size;
216+
217+
// Try to poke holes into the mappings
218+
ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0);
219+
ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0);
220+
ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0);
221+
}
222+
223+
TEST_F(perf_mmap, map)
224+
{
225+
unsigned long size = variant->ptr_size;
226+
227+
// Try to poke holes into the mappings by mapping anonymous memory over it
228+
ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE,
229+
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
230+
ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
231+
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
232+
ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
233+
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
234+
}
235+
236+
TEST_HARNESS_MAIN

0 commit comments

Comments
 (0)