Skip to content

Commit bcf3d95

Browse files
benzeajmberg-intel
authored andcommitted
um: refactor TLB update handling
Conceptually, we want the memory mappings to always be up to date and represent whatever is in the TLB. To ensure that, we need to sync them over in the userspace case and for the kernel we need to process the mappings. The kernel will call flush_tlb_* if page table entries that were valid before become invalid. Unfortunately, this is not the case if entries are added. As such, change both flush_tlb_* and set_ptes to track the memory range that has to be synchronized. For the kernel, we need to execute a flush_tlb_kern_* immediately but we can wait for the first page fault in case of set_ptes. For userspace in contrast we only store that a range of memory needs to be synced and do so whenever we switch to that process. Signed-off-by: Benjamin Berg <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Johannes Berg <[email protected]>
1 parent 573a446 commit bcf3d95

File tree

9 files changed

+110
-132
lines changed

9 files changed

+110
-132
lines changed

arch/um/drivers/ubd_kern.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include <linux/vmalloc.h>
3737
#include <linux/platform_device.h>
3838
#include <linux/scatterlist.h>
39-
#include <asm/tlbflush.h>
4039
#include <kern_util.h>
4140
#include "mconsole_kern.h"
4241
#include <init.h>
@@ -770,7 +769,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
770769
printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
771770
goto error;
772771
}
773-
flush_tlb_kernel_vm();
774772

775773
err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
776774
ubd_dev->cow.bitmap_offset,

arch/um/include/asm/mmu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111
typedef struct mm_context {
1212
struct mm_id id;
13+
14+
/* Address range in need of a TLB sync */
15+
unsigned long sync_tlb_range_from;
16+
unsigned long sync_tlb_range_to;
1317
} mm_context_t;
1418

1519
#endif

arch/um/include/asm/pgtable.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
244244

245245
#define PFN_PTE_SHIFT PAGE_SHIFT
246246

247+
static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
248+
unsigned long end)
249+
{
250+
if (!mm->context.sync_tlb_range_to) {
251+
mm->context.sync_tlb_range_from = start;
252+
mm->context.sync_tlb_range_to = end;
253+
} else {
254+
if (start < mm->context.sync_tlb_range_from)
255+
mm->context.sync_tlb_range_from = start;
256+
if (end > mm->context.sync_tlb_range_to)
257+
mm->context.sync_tlb_range_to = end;
258+
}
259+
}
260+
261+
#define set_ptes set_ptes
262+
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
263+
pte_t *ptep, pte_t pte, int nr)
264+
{
265+
/* Basically the default implementation */
266+
size_t length = nr * PAGE_SIZE;
267+
268+
for (;;) {
269+
set_pte(ptep, pte);
270+
if (--nr == 0)
271+
break;
272+
ptep++;
273+
pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
274+
}
275+
276+
um_tlb_mark_sync(mm, addr, addr + length);
277+
}
278+
247279
#define __HAVE_ARCH_PTE_SAME
248280
static inline int pte_same(pte_t pte_a, pte_t pte_b)
249281
{

arch/um/include/asm/tlbflush.h

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,51 @@
99
#include <linux/mm.h>
1010

1111
/*
12-
* TLB flushing:
12+
* In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
13+
* from the process handling the MM (which can be the kernel itself).
14+
*
15+
* To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
16+
* we catch all PTE transitions where memory that was unusable becomes usable.
17+
* While with flush_tlb_* we can track any memory that becomes unusable and
18+
* even if a higher layer of the page table was modified.
19+
*
20+
* So, we simply track updates using both methods and mark the memory area to
21+
* be synced later on. The only special case is that flush_tlb_kern_* needs to
22+
* be executed immediately as there is no good synchronization point in that
23+
* case. In contrast, in the set_ptes case we can wait for the next kernel
24+
* segfault before we do the synchornization.
1325
*
14-
* - flush_tlb() flushes the current mm struct TLBs
1526
* - flush_tlb_all() flushes all processes TLBs
1627
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
1728
* - flush_tlb_page(vma, vmaddr) flushes one page
18-
* - flush_tlb_kernel_vm() flushes the kernel vm area
1929
* - flush_tlb_range(vma, start, end) flushes a range of pages
30+
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
2031
*/
2132

33+
extern int um_tlb_sync(struct mm_struct *mm);
34+
2235
extern void flush_tlb_all(void);
2336
extern void flush_tlb_mm(struct mm_struct *mm);
24-
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
25-
unsigned long end);
26-
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
27-
extern void flush_tlb_kernel_vm(void);
28-
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
29-
extern void __flush_tlb_one(unsigned long addr);
37+
38+
static inline void flush_tlb_page(struct vm_area_struct *vma,
39+
unsigned long address)
40+
{
41+
um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
42+
}
43+
44+
static inline void flush_tlb_range(struct vm_area_struct *vma,
45+
unsigned long start, unsigned long end)
46+
{
47+
um_tlb_mark_sync(vma->vm_mm, start, end);
48+
}
49+
50+
static inline void flush_tlb_kernel_range(unsigned long start,
51+
unsigned long end)
52+
{
53+
um_tlb_mark_sync(&init_mm, start, end);
54+
55+
/* Kernel needs to be synced immediately */
56+
um_tlb_sync(&init_mm);
57+
}
3058

3159
#endif

arch/um/include/shared/skas/skas.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@ extern void handle_syscall(struct uml_pt_regs *regs);
1616
extern long execute_syscall_skas(void *r);
1717
extern unsigned long current_stub_stack(void);
1818
extern struct mm_id *current_mm_id(void);
19+
extern void current_mm_sync(void);
1920

2021
#endif

arch/um/kernel/skas/process.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <linux/sched/task_stack.h>
99
#include <linux/sched/task.h>
1010

11+
#include <asm/tlbflush.h>
12+
1113
#include <as-layout.h>
1214
#include <kern.h>
1315
#include <os.h>
@@ -58,3 +60,11 @@ struct mm_id *current_mm_id(void)
5860

5961
return &current->mm->context.id;
6062
}
63+
64+
void current_mm_sync(void)
65+
{
66+
if (current->mm == NULL)
67+
return;
68+
69+
um_tlb_sync(current->mm);
70+
}

arch/um/kernel/tlb.c

Lines changed: 12 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,16 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
170170
return ret;
171171
}
172172

173-
static int fix_range_common(struct mm_struct *mm, unsigned long start_addr,
174-
unsigned long end_addr)
173+
int um_tlb_sync(struct mm_struct *mm)
175174
{
176175
pgd_t *pgd;
177176
struct vm_ops ops;
178-
unsigned long addr = start_addr, next;
177+
unsigned long addr = mm->context.sync_tlb_range_from, next;
179178
int ret = 0;
180179

180+
if (mm->context.sync_tlb_range_to == 0)
181+
return 0;
182+
181183
ops.mm_idp = &mm->context.id;
182184
if (mm == &init_mm) {
183185
ops.mmap = kern_map;
@@ -191,7 +193,7 @@ static int fix_range_common(struct mm_struct *mm, unsigned long start_addr,
191193

192194
pgd = pgd_offset(mm, addr);
193195
do {
194-
next = pgd_addr_end(addr, end_addr);
196+
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
195197
if (!pgd_present(*pgd)) {
196198
if (pgd_newpage(*pgd)) {
197199
ret = ops.unmap(ops.mm_idp, addr,
@@ -200,87 +202,16 @@ static int fix_range_common(struct mm_struct *mm, unsigned long start_addr,
200202
}
201203
} else
202204
ret = update_p4d_range(pgd, addr, next, &ops);
203-
} while (pgd++, addr = next, ((addr < end_addr) && !ret));
205+
} while (pgd++, addr = next,
206+
((addr < mm->context.sync_tlb_range_to) && !ret));
204207

205208
if (ret == -ENOMEM)
206209
report_enomem();
207210

208-
return ret;
209-
}
210-
211-
static void flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
212-
{
213-
int err;
214-
215-
err = fix_range_common(&init_mm, start, end);
216-
217-
if (err)
218-
panic("flush_tlb_kernel failed, errno = %d\n", err);
219-
}
220-
221-
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
222-
{
223-
pgd_t *pgd;
224-
p4d_t *p4d;
225-
pud_t *pud;
226-
pmd_t *pmd;
227-
pte_t *pte;
228-
struct mm_struct *mm = vma->vm_mm;
229-
int r, w, x, prot;
230-
struct mm_id *mm_id;
231-
232-
address &= PAGE_MASK;
233-
234-
pgd = pgd_offset(mm, address);
235-
if (!pgd_present(*pgd))
236-
goto kill;
237-
238-
p4d = p4d_offset(pgd, address);
239-
if (!p4d_present(*p4d))
240-
goto kill;
241-
242-
pud = pud_offset(p4d, address);
243-
if (!pud_present(*pud))
244-
goto kill;
245-
246-
pmd = pmd_offset(pud, address);
247-
if (!pmd_present(*pmd))
248-
goto kill;
249-
250-
pte = pte_offset_kernel(pmd, address);
251-
252-
r = pte_read(*pte);
253-
w = pte_write(*pte);
254-
x = pte_exec(*pte);
255-
if (!pte_young(*pte)) {
256-
r = 0;
257-
w = 0;
258-
} else if (!pte_dirty(*pte)) {
259-
w = 0;
260-
}
261-
262-
mm_id = &mm->context.id;
263-
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
264-
(x ? UM_PROT_EXEC : 0));
265-
if (pte_newpage(*pte)) {
266-
if (pte_present(*pte)) {
267-
unsigned long long offset;
268-
int fd;
269-
270-
fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
271-
map(mm_id, address, PAGE_SIZE, prot, fd, offset);
272-
} else
273-
unmap(mm_id, address, PAGE_SIZE);
274-
} else if (pte_newprot(*pte))
275-
protect(mm_id, address, PAGE_SIZE, prot);
276-
277-
*pte = pte_mkuptodate(*pte);
211+
mm->context.sync_tlb_range_from = 0;
212+
mm->context.sync_tlb_range_to = 0;
278213

279-
return;
280-
281-
kill:
282-
printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
283-
force_sig(SIGKILL);
214+
return ret;
284215
}
285216

286217
void flush_tlb_all(void)
@@ -295,48 +226,11 @@ void flush_tlb_all(void)
295226
flush_tlb_mm(current->mm);
296227
}
297228

298-
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
299-
{
300-
flush_tlb_kernel_range_common(start, end);
301-
}
302-
303-
void flush_tlb_kernel_vm(void)
304-
{
305-
flush_tlb_kernel_range_common(start_vm, end_vm);
306-
}
307-
308-
void __flush_tlb_one(unsigned long addr)
309-
{
310-
flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
311-
}
312-
313-
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
314-
unsigned long end_addr)
315-
{
316-
/*
317-
* Don't bother flushing if this address space is about to be
318-
* destroyed.
319-
*/
320-
if (atomic_read(&mm->mm_users) == 0)
321-
return;
322-
323-
fix_range_common(mm, start_addr, end_addr);
324-
}
325-
326-
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
327-
unsigned long end)
328-
{
329-
if (vma->vm_mm == NULL)
330-
flush_tlb_kernel_range_common(start, end);
331-
else fix_range(vma->vm_mm, start, end);
332-
}
333-
EXPORT_SYMBOL(flush_tlb_range);
334-
335229
void flush_tlb_mm(struct mm_struct *mm)
336230
{
337231
struct vm_area_struct *vma;
338232
VMA_ITERATOR(vmi, mm, 0);
339233

340234
for_each_vma(vmi, vma)
341-
fix_range(mm, vma->vm_start, vma->vm_end);
235+
um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
342236
}

arch/um/kernel/trap.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
113113
#if 0
114114
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
115115
#endif
116-
flush_tlb_page(vma, address);
116+
117117
out:
118118
mmap_read_unlock(mm);
119119
out_nosemaphore:
@@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
210210
if (!is_user && regs)
211211
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
212212

213-
if (!is_user && (address >= start_vm) && (address < end_vm)) {
214-
flush_tlb_kernel_vm();
213+
if (!is_user && init_mm.context.sync_tlb_range_to) {
214+
/*
215+
* Kernel has pending updates from set_ptes that were not
216+
* flushed yet. Syncing them should fix the pagefault (if not
217+
* we'll get here again and panic).
218+
*/
219+
err = um_tlb_sync(&init_mm);
220+
if (err == -ENOMEM)
221+
report_enomem();
222+
if (err)
223+
panic("Failed to sync kernel TLBs: %d", err);
215224
goto out;
216225
}
217226
else if (current->mm == NULL) {

arch/um/os-Linux/skas/process.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
347347
while (1) {
348348
time_travel_print_bc_msg();
349349

350+
current_mm_sync();
351+
350352
/* Flush out any pending syscalls */
351353
err = syscall_stub_flush(current_mm_id());
352354
if (err) {

0 commit comments

Comments
 (0)