Skip to content

Commit 4972226

Browse files
committed
Merge patch series "proc/kcore: performance optimizations"
Omar Sandoval <[email protected]> says: The performance of /proc/kcore reads has been showing up as a bottleneck for drgn. drgn scripts often spend ~25% of their time in the kernel reading from /proc/kcore. A lot of this overhead comes from silly inefficiencies. This patch series fixes the low-hanging fruit. The fixes are all fairly small and straightforward. The result is a 25% improvement in read latency in micro-benchmarks (from ~235 nanoseconds to ~175) and a 15% improvement in execution time for real-world drgn scripts. Since I have a stake in /proc/kcore and have modified it several times, the final patch volunteers me to maintain it. * patches from https://lore.kernel.org/r/[email protected]: MAINTAINERS: add me as /proc/kcore maintainer proc/kcore: use percpu_rw_semaphore for kclist_lock proc/kcore: don't walk list on every read proc/kcore: mark proc entry as permanent Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents 40384c8 + 4620cb8 commit 4972226

File tree

2 files changed

+48
-40
lines changed

2 files changed

+48
-40
lines changed

MAINTAINERS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12385,6 +12385,13 @@ F: Documentation/kbuild/kconfig*
1238512385
F: scripts/Kconfig.include
1238612386
F: scripts/kconfig/
1238712387

12388+
KCORE
12389+
M: Omar Sandoval <[email protected]>
12390+
12391+
S: Maintained
12392+
F: fs/proc/kcore.c
12393+
F: include/linux/kcore.h
12394+
1238812395
KCOV
1238912396
R: Dmitry Vyukov <[email protected]>
1239012397
R: Andrey Konovalov <[email protected]>

fs/proc/kcore.c

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,11 @@ static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
6565
#endif
6666

6767
static LIST_HEAD(kclist_head);
68-
static DECLARE_RWSEM(kclist_lock);
68+
static int kcore_nphdr;
69+
static size_t kcore_phdrs_len;
70+
static size_t kcore_notes_len;
71+
static size_t kcore_data_offset;
72+
DEFINE_STATIC_PERCPU_RWSEM(kclist_lock);
6973
static int kcore_need_update = 1;
7074

7175
/*
@@ -101,33 +105,32 @@ void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
101105
list_add_tail(&new->list, &kclist_head);
102106
}
103107

104-
static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
105-
size_t *data_offset)
108+
static void update_kcore_size(void)
106109
{
107110
size_t try, size;
108111
struct kcore_list *m;
109112

110-
*nphdr = 1; /* PT_NOTE */
113+
kcore_nphdr = 1; /* PT_NOTE */
111114
size = 0;
112115

113116
list_for_each_entry(m, &kclist_head, list) {
114117
try = kc_vaddr_to_offset((size_t)m->addr + m->size);
115118
if (try > size)
116119
size = try;
117-
*nphdr = *nphdr + 1;
120+
kcore_nphdr++;
118121
}
119122

120-
*phdrs_len = *nphdr * sizeof(struct elf_phdr);
121-
*notes_len = (4 * sizeof(struct elf_note) +
122-
3 * ALIGN(sizeof(CORE_STR), 4) +
123-
VMCOREINFO_NOTE_NAME_BYTES +
124-
ALIGN(sizeof(struct elf_prstatus), 4) +
125-
ALIGN(sizeof(struct elf_prpsinfo), 4) +
126-
ALIGN(arch_task_struct_size, 4) +
127-
ALIGN(vmcoreinfo_size, 4));
128-
*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
129-
*notes_len);
130-
return *data_offset + size;
123+
kcore_phdrs_len = kcore_nphdr * sizeof(struct elf_phdr);
124+
kcore_notes_len = (4 * sizeof(struct elf_note) +
125+
3 * ALIGN(sizeof(CORE_STR), 4) +
126+
VMCOREINFO_NOTE_NAME_BYTES +
127+
ALIGN(sizeof(struct elf_prstatus), 4) +
128+
ALIGN(sizeof(struct elf_prpsinfo), 4) +
129+
ALIGN(arch_task_struct_size, 4) +
130+
ALIGN(vmcoreinfo_size, 4));
131+
kcore_data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + kcore_phdrs_len +
132+
kcore_notes_len);
133+
proc_root_kcore->size = kcore_data_offset + size;
131134
}
132135

133136
#ifdef CONFIG_HIGHMEM
@@ -270,12 +273,10 @@ static int kcore_update_ram(void)
270273
{
271274
LIST_HEAD(list);
272275
LIST_HEAD(garbage);
273-
int nphdr;
274-
size_t phdrs_len, notes_len, data_offset;
275276
struct kcore_list *tmp, *pos;
276277
int ret = 0;
277278

278-
down_write(&kclist_lock);
279+
percpu_down_write(&kclist_lock);
279280
if (!xchg(&kcore_need_update, 0))
280281
goto out;
281282

@@ -293,11 +294,10 @@ static int kcore_update_ram(void)
293294
}
294295
list_splice_tail(&list, &kclist_head);
295296

296-
proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, &notes_len,
297-
&data_offset);
297+
update_kcore_size();
298298

299299
out:
300-
up_write(&kclist_lock);
300+
percpu_up_write(&kclist_lock);
301301
list_for_each_entry_safe(pos, tmp, &garbage, list) {
302302
list_del(&pos->list);
303303
kfree(pos);
@@ -326,27 +326,24 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
326326
struct file *file = iocb->ki_filp;
327327
char *buf = file->private_data;
328328
loff_t *fpos = &iocb->ki_pos;
329-
size_t phdrs_offset, notes_offset, data_offset;
329+
size_t phdrs_offset, notes_offset;
330330
size_t page_offline_frozen = 1;
331-
size_t phdrs_len, notes_len;
332331
struct kcore_list *m;
333332
size_t tsz;
334-
int nphdr;
335333
unsigned long start;
336334
size_t buflen = iov_iter_count(iter);
337335
size_t orig_buflen = buflen;
338336
int ret = 0;
339337

340-
down_read(&kclist_lock);
338+
percpu_down_read(&kclist_lock);
341339
/*
342340
* Don't race against drivers that set PageOffline() and expect no
343341
* further page access.
344342
*/
345343
page_offline_freeze();
346344

347-
get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
348345
phdrs_offset = sizeof(struct elfhdr);
349-
notes_offset = phdrs_offset + phdrs_len;
346+
notes_offset = phdrs_offset + kcore_phdrs_len;
350347

351348
/* ELF file header. */
352349
if (buflen && *fpos < sizeof(struct elfhdr)) {
@@ -368,7 +365,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
368365
.e_flags = ELF_CORE_EFLAGS,
369366
.e_ehsize = sizeof(struct elfhdr),
370367
.e_phentsize = sizeof(struct elf_phdr),
371-
.e_phnum = nphdr,
368+
.e_phnum = kcore_nphdr,
372369
};
373370

374371
tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
@@ -382,24 +379,25 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
382379
}
383380

384381
/* ELF program headers. */
385-
if (buflen && *fpos < phdrs_offset + phdrs_len) {
382+
if (buflen && *fpos < phdrs_offset + kcore_phdrs_len) {
386383
struct elf_phdr *phdrs, *phdr;
387384

388-
phdrs = kzalloc(phdrs_len, GFP_KERNEL);
385+
phdrs = kzalloc(kcore_phdrs_len, GFP_KERNEL);
389386
if (!phdrs) {
390387
ret = -ENOMEM;
391388
goto out;
392389
}
393390

394391
phdrs[0].p_type = PT_NOTE;
395392
phdrs[0].p_offset = notes_offset;
396-
phdrs[0].p_filesz = notes_len;
393+
phdrs[0].p_filesz = kcore_notes_len;
397394

398395
phdr = &phdrs[1];
399396
list_for_each_entry(m, &kclist_head, list) {
400397
phdr->p_type = PT_LOAD;
401398
phdr->p_flags = PF_R | PF_W | PF_X;
402-
phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
399+
phdr->p_offset = kc_vaddr_to_offset(m->addr)
400+
+ kcore_data_offset;
403401
phdr->p_vaddr = (size_t)m->addr;
404402
if (m->type == KCORE_RAM)
405403
phdr->p_paddr = __pa(m->addr);
@@ -412,7 +410,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
412410
phdr++;
413411
}
414412

415-
tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
413+
tsz = min_t(size_t, buflen,
414+
phdrs_offset + kcore_phdrs_len - *fpos);
416415
if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz,
417416
iter) != tsz) {
418417
kfree(phdrs);
@@ -426,7 +425,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
426425
}
427426

428427
/* ELF note segment. */
429-
if (buflen && *fpos < notes_offset + notes_len) {
428+
if (buflen && *fpos < notes_offset + kcore_notes_len) {
430429
struct elf_prstatus prstatus = {};
431430
struct elf_prpsinfo prpsinfo = {
432431
.pr_sname = 'R',
@@ -438,7 +437,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
438437
strscpy(prpsinfo.pr_psargs, saved_command_line,
439438
sizeof(prpsinfo.pr_psargs));
440439

441-
notes = kzalloc(notes_len, GFP_KERNEL);
440+
notes = kzalloc(kcore_notes_len, GFP_KERNEL);
442441
if (!notes) {
443442
ret = -ENOMEM;
444443
goto out;
@@ -459,9 +458,10 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
459458
*/
460459
append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
461460
vmcoreinfo_data,
462-
min(vmcoreinfo_size, notes_len - i));
461+
min(vmcoreinfo_size, kcore_notes_len - i));
463462

464-
tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
463+
tsz = min_t(size_t, buflen,
464+
notes_offset + kcore_notes_len - *fpos);
465465
if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) {
466466
kfree(notes);
467467
ret = -EFAULT;
@@ -477,7 +477,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
477477
* Check to see if our file offset matches with any of
478478
* the addresses in the elf_phdr on our list.
479479
*/
480-
start = kc_offset_to_vaddr(*fpos - data_offset);
480+
start = kc_offset_to_vaddr(*fpos - kcore_data_offset);
481481
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
482482
tsz = buflen;
483483

@@ -626,7 +626,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
626626

627627
out:
628628
page_offline_thaw();
629-
up_read(&kclist_lock);
629+
percpu_up_read(&kclist_lock);
630630
if (ret)
631631
return ret;
632632
return orig_buflen - buflen;
@@ -663,6 +663,7 @@ static int release_kcore(struct inode *inode, struct file *file)
663663
}
664664

665665
static const struct proc_ops kcore_proc_ops = {
666+
.proc_flags = PROC_ENTRY_PERMANENT,
666667
.proc_read_iter = read_kcore_iter,
667668
.proc_open = open_kcore,
668669
.proc_release = release_kcore,

0 commit comments

Comments
 (0)