13
13
#include <linux/err.h>
14
14
#include <asm/page.h>
15
15
#include <asm/vdso.h>
16
+ #include <linux/time_namespace.h>
16
17
17
18
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
18
19
#include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
25
26
26
27
enum vvar_pages {
27
28
VVAR_DATA_PAGE_OFFSET ,
29
+ VVAR_TIMENS_PAGE_OFFSET ,
28
30
VVAR_NR_PAGES ,
29
31
};
30
32
31
33
#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
32
34
33
- static unsigned int vdso_pages __ro_after_init ;
34
- static struct page * * vdso_pagelist __ro_after_init ;
35
-
36
35
/*
37
36
* The vDSO data page.
38
37
*/
@@ -42,83 +41,228 @@ static union {
42
41
} vdso_data_store __page_aligned_data ;
43
42
struct vdso_data * vdso_data = & vdso_data_store .data ;
44
43
45
- static int __init vdso_init (void )
44
+ struct __vdso_info {
45
+ const char * name ;
46
+ const char * vdso_code_start ;
47
+ const char * vdso_code_end ;
48
+ unsigned long vdso_pages ;
49
+ /* Data Mapping */
50
+ struct vm_special_mapping * dm ;
51
+ /* Code Mapping */
52
+ struct vm_special_mapping * cm ;
53
+ };
54
+
55
+ static struct __vdso_info vdso_info __ro_after_init = {
56
+ .name = "vdso" ,
57
+ .vdso_code_start = vdso_start ,
58
+ .vdso_code_end = vdso_end ,
59
+ };
60
+
61
+ static int vdso_mremap (const struct vm_special_mapping * sm ,
62
+ struct vm_area_struct * new_vma )
63
+ {
64
+ current -> mm -> context .vdso = (void * )new_vma -> vm_start ;
65
+
66
+ return 0 ;
67
+ }
68
+
69
+ static int __init __vdso_init (void )
46
70
{
47
71
unsigned int i ;
72
+ struct page * * vdso_pagelist ;
73
+ unsigned long pfn ;
48
74
49
- vdso_pages = (vdso_end - vdso_start ) >> PAGE_SHIFT ;
50
- vdso_pagelist =
51
- kcalloc (vdso_pages + VVAR_NR_PAGES , sizeof (struct page * ), GFP_KERNEL );
52
- if (unlikely (vdso_pagelist == NULL )) {
53
- pr_err ("vdso: pagelist allocation failed\n" );
54
- return - ENOMEM ;
75
+ if (memcmp (vdso_info .vdso_code_start , "\177ELF" , 4 )) {
76
+ pr_err ("vDSO is not a valid ELF object!\n" );
77
+ return - EINVAL ;
55
78
}
56
79
57
- for (i = 0 ; i < vdso_pages ; i ++ ) {
58
- struct page * pg ;
80
+ vdso_info .vdso_pages = (
81
+ vdso_info .vdso_code_end -
82
+ vdso_info .vdso_code_start ) >>
83
+ PAGE_SHIFT ;
84
+
85
+ vdso_pagelist = kcalloc (vdso_info .vdso_pages ,
86
+ sizeof (struct page * ),
87
+ GFP_KERNEL );
88
+ if (vdso_pagelist == NULL )
89
+ return - ENOMEM ;
90
+
91
+ /* Grab the vDSO code pages. */
92
+ pfn = sym_to_pfn (vdso_info .vdso_code_start );
93
+
94
+ for (i = 0 ; i < vdso_info .vdso_pages ; i ++ )
95
+ vdso_pagelist [i ] = pfn_to_page (pfn + i );
96
+
97
+ vdso_info .cm -> pages = vdso_pagelist ;
98
+
99
+ return 0 ;
100
+ }
101
+
102
+ #ifdef CONFIG_TIME_NS
103
+ struct vdso_data * arch_get_vdso_data (void * vvar_page )
104
+ {
105
+ return (struct vdso_data * )(vvar_page );
106
+ }
107
+
108
+ /*
109
+ * The vvar mapping contains data for a specific time namespace, so when a task
110
+ * changes namespace we must unmap its vvar data for the old namespace.
111
+ * Subsequent faults will map in data for the new namespace.
112
+ *
113
+ * For more details see timens_setup_vdso_data().
114
+ */
115
+ int vdso_join_timens (struct task_struct * task , struct time_namespace * ns )
116
+ {
117
+ struct mm_struct * mm = task -> mm ;
118
+ struct vm_area_struct * vma ;
119
+
120
+ mmap_read_lock (mm );
59
121
60
- pg = virt_to_page (vdso_start + (i << PAGE_SHIFT ));
61
- vdso_pagelist [i ] = pg ;
122
+ for (vma = mm -> mmap ; vma ; vma = vma -> vm_next ) {
123
+ unsigned long size = vma -> vm_end - vma -> vm_start ;
124
+
125
+ if (vma_is_special_mapping (vma , vdso_info .dm ))
126
+ zap_page_range (vma , vma -> vm_start , size );
62
127
}
63
- vdso_pagelist [i ] = virt_to_page (vdso_data );
64
128
129
+ mmap_read_unlock (mm );
65
130
return 0 ;
66
131
}
132
+
133
+ static struct page * find_timens_vvar_page (struct vm_area_struct * vma )
134
+ {
135
+ if (likely (vma -> vm_mm == current -> mm ))
136
+ return current -> nsproxy -> time_ns -> vvar_page ;
137
+
138
+ /*
139
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
140
+ * through interfaces like /proc/$pid/mem or
141
+ * process_vm_{readv,writev}() as long as there's no .access()
142
+ * in special_mapping_vmops.
143
+ * For more details check_vma_flags() and __access_remote_vm()
144
+ */
145
+ WARN (1 , "vvar_page accessed remotely" );
146
+
147
+ return NULL ;
148
+ }
149
+ #else
150
+ static struct page * find_timens_vvar_page (struct vm_area_struct * vma )
151
+ {
152
+ return NULL ;
153
+ }
154
+ #endif
155
+
156
+ static vm_fault_t vvar_fault (const struct vm_special_mapping * sm ,
157
+ struct vm_area_struct * vma , struct vm_fault * vmf )
158
+ {
159
+ struct page * timens_page = find_timens_vvar_page (vma );
160
+ unsigned long pfn ;
161
+
162
+ switch (vmf -> pgoff ) {
163
+ case VVAR_DATA_PAGE_OFFSET :
164
+ if (timens_page )
165
+ pfn = page_to_pfn (timens_page );
166
+ else
167
+ pfn = sym_to_pfn (vdso_data );
168
+ break ;
169
+ #ifdef CONFIG_TIME_NS
170
+ case VVAR_TIMENS_PAGE_OFFSET :
171
+ /*
172
+ * If a task belongs to a time namespace then a namespace
173
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
174
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
175
+ * offset.
176
+ * See also the comment near timens_setup_vdso_data().
177
+ */
178
+ if (!timens_page )
179
+ return VM_FAULT_SIGBUS ;
180
+ pfn = sym_to_pfn (vdso_data );
181
+ break ;
182
+ #endif /* CONFIG_TIME_NS */
183
+ default :
184
+ return VM_FAULT_SIGBUS ;
185
+ }
186
+
187
+ return vmf_insert_pfn (vma , vmf -> address , pfn );
188
+ }
189
+
190
+ enum rv_vdso_map {
191
+ RV_VDSO_MAP_VVAR ,
192
+ RV_VDSO_MAP_VDSO ,
193
+ };
194
+
195
+ static struct vm_special_mapping rv_vdso_maps [] __ro_after_init = {
196
+ [RV_VDSO_MAP_VVAR ] = {
197
+ .name = "[vvar]" ,
198
+ .fault = vvar_fault ,
199
+ },
200
+ [RV_VDSO_MAP_VDSO ] = {
201
+ .name = "[vdso]" ,
202
+ .mremap = vdso_mremap ,
203
+ },
204
+ };
205
+
206
+ static int __init vdso_init (void )
207
+ {
208
+ vdso_info .dm = & rv_vdso_maps [RV_VDSO_MAP_VVAR ];
209
+ vdso_info .cm = & rv_vdso_maps [RV_VDSO_MAP_VDSO ];
210
+
211
+ return __vdso_init ();
212
+ }
67
213
arch_initcall (vdso_init );
68
214
69
- int arch_setup_additional_pages (struct linux_binprm * bprm ,
70
- int uses_interp )
215
+ static int __setup_additional_pages (struct mm_struct * mm ,
216
+ struct linux_binprm * bprm ,
217
+ int uses_interp )
71
218
{
72
- struct mm_struct * mm = current -> mm ;
73
- unsigned long vdso_base , vdso_len ;
74
- int ret ;
219
+ unsigned long vdso_base , vdso_text_len , vdso_mapping_len ;
220
+ void * ret ;
75
221
76
222
BUILD_BUG_ON (VVAR_NR_PAGES != __VVAR_PAGES );
77
223
78
- vdso_len = (vdso_pages + VVAR_NR_PAGES ) << PAGE_SHIFT ;
224
+ vdso_text_len = vdso_info .vdso_pages << PAGE_SHIFT ;
225
+ /* Be sure to map the data page */
226
+ vdso_mapping_len = vdso_text_len + VVAR_SIZE ;
79
227
80
- if (mmap_write_lock_killable (mm ))
81
- return - EINTR ;
82
-
83
- vdso_base = get_unmapped_area (NULL , 0 , vdso_len , 0 , 0 );
228
+ vdso_base = get_unmapped_area (NULL , 0 , vdso_mapping_len , 0 , 0 );
84
229
if (IS_ERR_VALUE (vdso_base )) {
85
- ret = vdso_base ;
86
- goto end ;
230
+ ret = ERR_PTR ( vdso_base ) ;
231
+ goto up_fail ;
87
232
}
88
233
89
- mm -> context .vdso = NULL ;
90
- ret = install_special_mapping (mm , vdso_base , VVAR_SIZE ,
91
- (VM_READ | VM_MAYREAD ), & vdso_pagelist [vdso_pages ]);
92
- if (unlikely (ret ))
93
- goto end ;
234
+ ret = _install_special_mapping (mm , vdso_base , VVAR_SIZE ,
235
+ (VM_READ | VM_MAYREAD | VM_PFNMAP ), vdso_info .dm );
236
+ if (IS_ERR (ret ))
237
+ goto up_fail ;
94
238
239
+ vdso_base += VVAR_SIZE ;
240
+ mm -> context .vdso = (void * )vdso_base ;
95
241
ret =
96
- install_special_mapping (mm , vdso_base + VVAR_SIZE ,
97
- vdso_pages << PAGE_SHIFT ,
242
+ _install_special_mapping (mm , vdso_base , vdso_text_len ,
98
243
(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC ),
99
- vdso_pagelist );
244
+ vdso_info . cm );
100
245
101
- if (unlikely (ret ))
102
- goto end ;
246
+ if (IS_ERR (ret ))
247
+ goto up_fail ;
103
248
104
- /*
105
- * Put vDSO base into mm struct. We need to do this before calling
106
- * install_special_mapping or the perf counter mmap tracking code
107
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
108
- */
109
- mm -> context .vdso = (void * )vdso_base + VVAR_SIZE ;
249
+ return 0 ;
110
250
111
- end :
112
- mmap_write_unlock ( mm ) ;
113
- return ret ;
251
+ up_fail :
252
+ mm -> context . vdso = NULL ;
253
+ return PTR_ERR ( ret ) ;
114
254
}
115
255
116
- const char * arch_vma_name (struct vm_area_struct * vma )
256
+ int arch_setup_additional_pages (struct linux_binprm * bprm , int uses_interp )
117
257
{
118
- if (vma -> vm_mm && (vma -> vm_start == (long )vma -> vm_mm -> context .vdso ))
119
- return "[vdso]" ;
120
- if (vma -> vm_mm && (vma -> vm_start ==
121
- (long )vma -> vm_mm -> context .vdso - VVAR_SIZE ))
122
- return "[vdso_data]" ;
123
- return NULL ;
258
+ struct mm_struct * mm = current -> mm ;
259
+ int ret ;
260
+
261
+ if (mmap_write_lock_killable (mm ))
262
+ return - EINTR ;
263
+
264
+ ret = __setup_additional_pages (mm , bprm , uses_interp );
265
+ mmap_write_unlock (mm );
266
+
267
+ return ret ;
124
268
}
0 commit comments