Skip to content

Commit 680e029

Browse files
osandovbrauner
authored andcommitted
proc/kcore: don't walk list on every read
We maintain a list of memory ranges for /proc/kcore, which usually has 10-20 entries. Currently, every single read from /proc/kcore walks the entire list in order to count the number of entries and compute some offsets. These values only change when the list of memory ranges changes, which is very rare (only when memory is hot(un)plugged). We can cache the values when the list is populated to avoid these redundant walks. In my benchmark, this reduces the time per read by another 20 nanoseconds on top of the previous change, from 215 nanoseconds per read to 195. Link: osandov/drgn#106 Signed-off-by: Omar Sandoval <[email protected]> Link: https://lore.kernel.org/r/8d945558b9c9efe74103a34b7780f1cd90d9ce7f.1731115587.git.osandov@fb.com Signed-off-by: Christian Brauner <[email protected]>
1 parent c9136fa commit 680e029

File tree

1 file changed

+35
-35
lines changed

1 file changed

+35
-35
lines changed

fs/proc/kcore.c

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
6565
#endif
6666

6767
static LIST_HEAD(kclist_head);
68+
static int kcore_nphdr;
69+
static size_t kcore_phdrs_len;
70+
static size_t kcore_notes_len;
71+
static size_t kcore_data_offset;
6872
static DECLARE_RWSEM(kclist_lock);
6973
static int kcore_need_update = 1;
7074

@@ -101,33 +105,32 @@ void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
101105
list_add_tail(&new->list, &kclist_head);
102106
}
103107

104-
static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
105-
size_t *data_offset)
108+
static void update_kcore_size(void)
106109
{
107110
size_t try, size;
108111
struct kcore_list *m;
109112

110-
*nphdr = 1; /* PT_NOTE */
113+
kcore_nphdr = 1; /* PT_NOTE */
111114
size = 0;
112115

113116
list_for_each_entry(m, &kclist_head, list) {
114117
try = kc_vaddr_to_offset((size_t)m->addr + m->size);
115118
if (try > size)
116119
size = try;
117-
*nphdr = *nphdr + 1;
120+
kcore_nphdr++;
118121
}
119122

120-
*phdrs_len = *nphdr * sizeof(struct elf_phdr);
121-
*notes_len = (4 * sizeof(struct elf_note) +
122-
3 * ALIGN(sizeof(CORE_STR), 4) +
123-
VMCOREINFO_NOTE_NAME_BYTES +
124-
ALIGN(sizeof(struct elf_prstatus), 4) +
125-
ALIGN(sizeof(struct elf_prpsinfo), 4) +
126-
ALIGN(arch_task_struct_size, 4) +
127-
ALIGN(vmcoreinfo_size, 4));
128-
*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
129-
*notes_len);
130-
return *data_offset + size;
123+
kcore_phdrs_len = kcore_nphdr * sizeof(struct elf_phdr);
124+
kcore_notes_len = (4 * sizeof(struct elf_note) +
125+
3 * ALIGN(sizeof(CORE_STR), 4) +
126+
VMCOREINFO_NOTE_NAME_BYTES +
127+
ALIGN(sizeof(struct elf_prstatus), 4) +
128+
ALIGN(sizeof(struct elf_prpsinfo), 4) +
129+
ALIGN(arch_task_struct_size, 4) +
130+
ALIGN(vmcoreinfo_size, 4));
131+
kcore_data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + kcore_phdrs_len +
132+
kcore_notes_len);
133+
proc_root_kcore->size = kcore_data_offset + size;
131134
}
132135

133136
#ifdef CONFIG_HIGHMEM
@@ -270,8 +273,6 @@ static int kcore_update_ram(void)
270273
{
271274
LIST_HEAD(list);
272275
LIST_HEAD(garbage);
273-
int nphdr;
274-
size_t phdrs_len, notes_len, data_offset;
275276
struct kcore_list *tmp, *pos;
276277
int ret = 0;
277278

@@ -293,8 +294,7 @@ static int kcore_update_ram(void)
293294
}
294295
list_splice_tail(&list, &kclist_head);
295296

296-
proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, &notes_len,
297-
&data_offset);
297+
update_kcore_size();
298298

299299
out:
300300
up_write(&kclist_lock);
@@ -326,12 +326,10 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
326326
struct file *file = iocb->ki_filp;
327327
char *buf = file->private_data;
328328
loff_t *fpos = &iocb->ki_pos;
329-
size_t phdrs_offset, notes_offset, data_offset;
329+
size_t phdrs_offset, notes_offset;
330330
size_t page_offline_frozen = 1;
331-
size_t phdrs_len, notes_len;
332331
struct kcore_list *m;
333332
size_t tsz;
334-
int nphdr;
335333
unsigned long start;
336334
size_t buflen = iov_iter_count(iter);
337335
size_t orig_buflen = buflen;
@@ -344,9 +342,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
344342
*/
345343
page_offline_freeze();
346344

347-
get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
348345
phdrs_offset = sizeof(struct elfhdr);
349-
notes_offset = phdrs_offset + phdrs_len;
346+
notes_offset = phdrs_offset + kcore_phdrs_len;
350347

351348
/* ELF file header. */
352349
if (buflen && *fpos < sizeof(struct elfhdr)) {
@@ -368,7 +365,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
368365
.e_flags = ELF_CORE_EFLAGS,
369366
.e_ehsize = sizeof(struct elfhdr),
370367
.e_phentsize = sizeof(struct elf_phdr),
371-
.e_phnum = nphdr,
368+
.e_phnum = kcore_nphdr,
372369
};
373370

374371
tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
@@ -382,24 +379,25 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
382379
}
383380

384381
/* ELF program headers. */
385-
if (buflen && *fpos < phdrs_offset + phdrs_len) {
382+
if (buflen && *fpos < phdrs_offset + kcore_phdrs_len) {
386383
struct elf_phdr *phdrs, *phdr;
387384

388-
phdrs = kzalloc(phdrs_len, GFP_KERNEL);
385+
phdrs = kzalloc(kcore_phdrs_len, GFP_KERNEL);
389386
if (!phdrs) {
390387
ret = -ENOMEM;
391388
goto out;
392389
}
393390

394391
phdrs[0].p_type = PT_NOTE;
395392
phdrs[0].p_offset = notes_offset;
396-
phdrs[0].p_filesz = notes_len;
393+
phdrs[0].p_filesz = kcore_notes_len;
397394

398395
phdr = &phdrs[1];
399396
list_for_each_entry(m, &kclist_head, list) {
400397
phdr->p_type = PT_LOAD;
401398
phdr->p_flags = PF_R | PF_W | PF_X;
402-
phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
399+
phdr->p_offset = kc_vaddr_to_offset(m->addr)
400+
+ kcore_data_offset;
403401
phdr->p_vaddr = (size_t)m->addr;
404402
if (m->type == KCORE_RAM)
405403
phdr->p_paddr = __pa(m->addr);
@@ -412,7 +410,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
412410
phdr++;
413411
}
414412

415-
tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
413+
tsz = min_t(size_t, buflen,
414+
phdrs_offset + kcore_phdrs_len - *fpos);
416415
if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz,
417416
iter) != tsz) {
418417
kfree(phdrs);
@@ -426,7 +425,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
426425
}
427426

428427
/* ELF note segment. */
429-
if (buflen && *fpos < notes_offset + notes_len) {
428+
if (buflen && *fpos < notes_offset + kcore_notes_len) {
430429
struct elf_prstatus prstatus = {};
431430
struct elf_prpsinfo prpsinfo = {
432431
.pr_sname = 'R',
@@ -438,7 +437,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
438437
strscpy(prpsinfo.pr_psargs, saved_command_line,
439438
sizeof(prpsinfo.pr_psargs));
440439

441-
notes = kzalloc(notes_len, GFP_KERNEL);
440+
notes = kzalloc(kcore_notes_len, GFP_KERNEL);
442441
if (!notes) {
443442
ret = -ENOMEM;
444443
goto out;
@@ -459,9 +458,10 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
459458
*/
460459
append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
461460
vmcoreinfo_data,
462-
min(vmcoreinfo_size, notes_len - i));
461+
min(vmcoreinfo_size, kcore_notes_len - i));
463462

464-
tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
463+
tsz = min_t(size_t, buflen,
464+
notes_offset + kcore_notes_len - *fpos);
465465
if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) {
466466
kfree(notes);
467467
ret = -EFAULT;
@@ -477,7 +477,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
477477
* Check to see if our file offset matches with any of
478478
* the addresses in the elf_phdr on our list.
479479
*/
480-
start = kc_offset_to_vaddr(*fpos - data_offset);
480+
start = kc_offset_to_vaddr(*fpos - kcore_data_offset);
481481
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
482482
tsz = buflen;
483483

0 commit comments

Comments
 (0)