Skip to content

Commit dffe11e

Browse files
Tong Tiangenpalmer-dabbelt
authored andcommitted
riscv/vdso: Add support for time namespaces
Implement generic vdso time namespace support which also enables time namespaces for riscv. This is quite similar to what arm64 does. selftest/timens test result: 1..10 ok 1 Passed for CLOCK_BOOTTIME (syscall) ok 2 Passed for CLOCK_BOOTTIME (vdso) ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported ok 5 Passed for CLOCK_MONOTONIC (syscall) ok 6 Passed for CLOCK_MONOTONIC (vdso) ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall) ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso) ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall) ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso) # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0 Signed-off-by: Tong Tiangen <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 8edab02 commit dffe11e

File tree

6 files changed

+211
-54
lines changed

6 files changed

+211
-54
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ config RISCV
6262
select GENERIC_SCHED_CLOCK
6363
select GENERIC_SMP_IDLE_THREAD
6464
select GENERIC_TIME_VSYSCALL if MMU && 64BIT
65+
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
6566
select HANDLE_DOMAIN_IRQ
6667
select HAVE_ARCH_AUDITSYSCALL
6768
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL

arch/riscv/include/asm/page.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
157157
#define page_to_bus(page) (page_to_phys(page))
158158
#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr)))
159159

160+
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
161+
160162
#ifdef CONFIG_FLATMEM
161163
#define pfn_valid(pfn) \
162164
(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))

arch/riscv/include/asm/vdso.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
*/
2323
#ifdef CONFIG_MMU
2424

25-
#define __VVAR_PAGES 1
25+
#define __VVAR_PAGES 2
2626

2727
#ifndef __ASSEMBLY__
2828
#include <generated/vdso-offsets.h>

arch/riscv/include/asm/vdso/gettimeofday.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
7676
return _vdso_data;
7777
}
7878

79+
#ifdef CONFIG_TIME_NS
80+
static __always_inline
81+
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
82+
{
83+
return _timens_data;
84+
}
85+
#endif
7986
#endif /* !__ASSEMBLY__ */
8087

8188
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */

arch/riscv/kernel/vdso.c

Lines changed: 197 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/err.h>
1414
#include <asm/page.h>
1515
#include <asm/vdso.h>
16+
#include <linux/time_namespace.h>
1617

1718
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
1819
#include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
2526

2627
enum vvar_pages {
2728
VVAR_DATA_PAGE_OFFSET,
29+
VVAR_TIMENS_PAGE_OFFSET,
2830
VVAR_NR_PAGES,
2931
};
3032

3133
#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
3234

33-
static unsigned int vdso_pages __ro_after_init;
34-
static struct page **vdso_pagelist __ro_after_init;
35-
3635
/*
3736
* The vDSO data page.
3837
*/
@@ -42,83 +41,228 @@ static union {
4241
} vdso_data_store __page_aligned_data;
4342
struct vdso_data *vdso_data = &vdso_data_store.data;
4443

45-
static int __init vdso_init(void)
44+
struct __vdso_info {
45+
const char *name;
46+
const char *vdso_code_start;
47+
const char *vdso_code_end;
48+
unsigned long vdso_pages;
49+
/* Data Mapping */
50+
struct vm_special_mapping *dm;
51+
/* Code Mapping */
52+
struct vm_special_mapping *cm;
53+
};
54+
55+
static struct __vdso_info vdso_info __ro_after_init = {
56+
.name = "vdso",
57+
.vdso_code_start = vdso_start,
58+
.vdso_code_end = vdso_end,
59+
};
60+
61+
static int vdso_mremap(const struct vm_special_mapping *sm,
62+
struct vm_area_struct *new_vma)
63+
{
64+
current->mm->context.vdso = (void *)new_vma->vm_start;
65+
66+
return 0;
67+
}
68+
69+
static int __init __vdso_init(void)
4670
{
4771
unsigned int i;
72+
struct page **vdso_pagelist;
73+
unsigned long pfn;
4874

49-
vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
50-
vdso_pagelist =
51-
kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
52-
if (unlikely(vdso_pagelist == NULL)) {
53-
pr_err("vdso: pagelist allocation failed\n");
54-
return -ENOMEM;
75+
if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
76+
pr_err("vDSO is not a valid ELF object!\n");
77+
return -EINVAL;
5578
}
5679

57-
for (i = 0; i < vdso_pages; i++) {
58-
struct page *pg;
80+
vdso_info.vdso_pages = (
81+
vdso_info.vdso_code_end -
82+
vdso_info.vdso_code_start) >>
83+
PAGE_SHIFT;
84+
85+
vdso_pagelist = kcalloc(vdso_info.vdso_pages,
86+
sizeof(struct page *),
87+
GFP_KERNEL);
88+
if (vdso_pagelist == NULL)
89+
return -ENOMEM;
90+
91+
/* Grab the vDSO code pages. */
92+
pfn = sym_to_pfn(vdso_info.vdso_code_start);
93+
94+
for (i = 0; i < vdso_info.vdso_pages; i++)
95+
vdso_pagelist[i] = pfn_to_page(pfn + i);
96+
97+
vdso_info.cm->pages = vdso_pagelist;
98+
99+
return 0;
100+
}
101+
102+
#ifdef CONFIG_TIME_NS
103+
struct vdso_data *arch_get_vdso_data(void *vvar_page)
104+
{
105+
return (struct vdso_data *)(vvar_page);
106+
}
107+
108+
/*
109+
* The vvar mapping contains data for a specific time namespace, so when a task
110+
* changes namespace we must unmap its vvar data for the old namespace.
111+
* Subsequent faults will map in data for the new namespace.
112+
*
113+
* For more details see timens_setup_vdso_data().
114+
*/
115+
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
116+
{
117+
struct mm_struct *mm = task->mm;
118+
struct vm_area_struct *vma;
119+
120+
mmap_read_lock(mm);
59121

60-
pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
61-
vdso_pagelist[i] = pg;
122+
for (vma = mm->mmap; vma; vma = vma->vm_next) {
123+
unsigned long size = vma->vm_end - vma->vm_start;
124+
125+
if (vma_is_special_mapping(vma, vdso_info.dm))
126+
zap_page_range(vma, vma->vm_start, size);
62127
}
63-
vdso_pagelist[i] = virt_to_page(vdso_data);
64128

129+
mmap_read_unlock(mm);
65130
return 0;
66131
}
132+
133+
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
134+
{
135+
if (likely(vma->vm_mm == current->mm))
136+
return current->nsproxy->time_ns->vvar_page;
137+
138+
/*
139+
* VM_PFNMAP | VM_IO protect .fault() handler from being called
140+
* through interfaces like /proc/$pid/mem or
141+
* process_vm_{readv,writev}() as long as there's no .access()
142+
* in special_mapping_vmops.
143+
* For more details check_vma_flags() and __access_remote_vm()
144+
*/
145+
WARN(1, "vvar_page accessed remotely");
146+
147+
return NULL;
148+
}
149+
#else
150+
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
151+
{
152+
return NULL;
153+
}
154+
#endif
155+
156+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
157+
struct vm_area_struct *vma, struct vm_fault *vmf)
158+
{
159+
struct page *timens_page = find_timens_vvar_page(vma);
160+
unsigned long pfn;
161+
162+
switch (vmf->pgoff) {
163+
case VVAR_DATA_PAGE_OFFSET:
164+
if (timens_page)
165+
pfn = page_to_pfn(timens_page);
166+
else
167+
pfn = sym_to_pfn(vdso_data);
168+
break;
169+
#ifdef CONFIG_TIME_NS
170+
case VVAR_TIMENS_PAGE_OFFSET:
171+
/*
172+
* If a task belongs to a time namespace then a namespace
173+
* specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
174+
* the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
175+
* offset.
176+
* See also the comment near timens_setup_vdso_data().
177+
*/
178+
if (!timens_page)
179+
return VM_FAULT_SIGBUS;
180+
pfn = sym_to_pfn(vdso_data);
181+
break;
182+
#endif /* CONFIG_TIME_NS */
183+
default:
184+
return VM_FAULT_SIGBUS;
185+
}
186+
187+
return vmf_insert_pfn(vma, vmf->address, pfn);
188+
}
189+
190+
enum rv_vdso_map {
191+
RV_VDSO_MAP_VVAR,
192+
RV_VDSO_MAP_VDSO,
193+
};
194+
195+
static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
196+
[RV_VDSO_MAP_VVAR] = {
197+
.name = "[vvar]",
198+
.fault = vvar_fault,
199+
},
200+
[RV_VDSO_MAP_VDSO] = {
201+
.name = "[vdso]",
202+
.mremap = vdso_mremap,
203+
},
204+
};
205+
206+
static int __init vdso_init(void)
207+
{
208+
vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
209+
vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
210+
211+
return __vdso_init();
212+
}
67213
arch_initcall(vdso_init);
68214

69-
int arch_setup_additional_pages(struct linux_binprm *bprm,
70-
int uses_interp)
215+
static int __setup_additional_pages(struct mm_struct *mm,
216+
struct linux_binprm *bprm,
217+
int uses_interp)
71218
{
72-
struct mm_struct *mm = current->mm;
73-
unsigned long vdso_base, vdso_len;
74-
int ret;
219+
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
220+
void *ret;
75221

76222
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
77223

78-
vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
224+
vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
225+
/* Be sure to map the data page */
226+
vdso_mapping_len = vdso_text_len + VVAR_SIZE;
79227

80-
if (mmap_write_lock_killable(mm))
81-
return -EINTR;
82-
83-
vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
228+
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
84229
if (IS_ERR_VALUE(vdso_base)) {
85-
ret = vdso_base;
86-
goto end;
230+
ret = ERR_PTR(vdso_base);
231+
goto up_fail;
87232
}
88233

89-
mm->context.vdso = NULL;
90-
ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
91-
(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
92-
if (unlikely(ret))
93-
goto end;
234+
ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
235+
(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
236+
if (IS_ERR(ret))
237+
goto up_fail;
94238

239+
vdso_base += VVAR_SIZE;
240+
mm->context.vdso = (void *)vdso_base;
95241
ret =
96-
install_special_mapping(mm, vdso_base + VVAR_SIZE,
97-
vdso_pages << PAGE_SHIFT,
242+
_install_special_mapping(mm, vdso_base, vdso_text_len,
98243
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
99-
vdso_pagelist);
244+
vdso_info.cm);
100245

101-
if (unlikely(ret))
102-
goto end;
246+
if (IS_ERR(ret))
247+
goto up_fail;
103248

104-
/*
105-
* Put vDSO base into mm struct. We need to do this before calling
106-
* install_special_mapping or the perf counter mmap tracking code
107-
* will fail to recognise it as a vDSO (since arch_vma_name fails).
108-
*/
109-
mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
249+
return 0;
110250

111-
end:
112-
mmap_write_unlock(mm);
113-
return ret;
251+
up_fail:
252+
mm->context.vdso = NULL;
253+
return PTR_ERR(ret);
114254
}
115255

116-
const char *arch_vma_name(struct vm_area_struct *vma)
256+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
117257
{
118-
if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
119-
return "[vdso]";
120-
if (vma->vm_mm && (vma->vm_start ==
121-
(long)vma->vm_mm->context.vdso - VVAR_SIZE))
122-
return "[vdso_data]";
123-
return NULL;
258+
struct mm_struct *mm = current->mm;
259+
int ret;
260+
261+
if (mmap_write_lock_killable(mm))
262+
return -EINTR;
263+
264+
ret = __setup_additional_pages(mm, bprm, uses_interp);
265+
mmap_write_unlock(mm);
266+
267+
return ret;
124268
}

arch/riscv/kernel/vdso/vdso.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
1010
SECTIONS
1111
{
1212
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
13+
#ifdef CONFIG_TIME_NS
14+
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
15+
#endif
1316
. = SIZEOF_HEADERS;
1417

1518
.hash : { *(.hash) } :text

0 commit comments

Comments
 (0)