@@ -77,8 +77,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
77
77
copy .mode = 0 ;
78
78
79
79
r = ioctl (uffd , UFFDIO_COPY , & copy );
80
- if (r == -1 ) {
81
- pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n" ,
80
+ /*
81
+ * With multiple vCPU threads fault on a single page and there are
82
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
83
+ * will fail with EEXIST: handle that case without signaling an
84
+ * error.
85
+ *
86
+ * Note that this also suppress any EEXISTs occurring from,
87
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
88
+ * happens here, but a realistic VMM might potentially maintain
89
+ * some external state to correctly surface EEXISTs to userspace
90
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
91
+ */
92
+ if (r == -1 && errno != EEXIST ) {
93
+ pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n" ,
82
94
addr , tid , errno );
83
95
return r ;
84
96
}
@@ -89,8 +101,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
89
101
cont .range .len = demand_paging_size ;
90
102
91
103
r = ioctl (uffd , UFFDIO_CONTINUE , & cont );
92
- if (r == -1 ) {
93
- pr_info ("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n" ,
104
+ /*
105
+ * With multiple vCPU threads fault on a single page and there are
106
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
107
+ * will fail with EEXIST: handle that case without signaling an
108
+ * error.
109
+ *
110
+ * Note that this also suppress any EEXISTs occurring from,
111
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
112
+ * happens here, but a realistic VMM might potentially maintain
113
+ * some external state to correctly surface EEXISTs to userspace
114
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
115
+ */
116
+ if (r == -1 && errno != EEXIST ) {
117
+ pr_info ("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n" ,
94
118
addr , tid , errno );
95
119
return r ;
96
120
}
@@ -110,7 +134,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
110
134
111
135
struct test_params {
112
136
int uffd_mode ;
137
+ bool single_uffd ;
113
138
useconds_t uffd_delay ;
139
+ int readers_per_uffd ;
114
140
enum vm_mem_backing_src_type src_type ;
115
141
bool partition_vcpu_memory_access ;
116
142
};
@@ -131,11 +157,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
131
157
struct memstress_vcpu_args * vcpu_args ;
132
158
struct test_params * p = arg ;
133
159
struct uffd_desc * * uffd_descs = NULL ;
160
+ uint64_t uffd_region_size ;
134
161
struct timespec start ;
135
162
struct timespec ts_diff ;
136
163
double vcpu_paging_rate ;
137
164
struct kvm_vm * vm ;
138
- int i ;
165
+ int i , num_uffds = 0 ;
139
166
140
167
vm = memstress_create_vm (mode , nr_vcpus , guest_percpu_mem_size , 1 ,
141
168
p -> src_type , p -> partition_vcpu_memory_access );
@@ -148,17 +175,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
148
175
memset (guest_data_prototype , 0xAB , demand_paging_size );
149
176
150
177
if (p -> uffd_mode == UFFDIO_REGISTER_MODE_MINOR ) {
151
- for (i = 0 ; i < nr_vcpus ; i ++ ) {
178
+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
179
+ for (i = 0 ; i < num_uffds ; i ++ ) {
152
180
vcpu_args = & memstress_args .vcpu_args [i ];
153
181
prefault_mem (addr_gpa2alias (vm , vcpu_args -> gpa ),
154
182
vcpu_args -> pages * memstress_args .guest_page_size );
155
183
}
156
184
}
157
185
158
186
if (p -> uffd_mode ) {
159
- uffd_descs = malloc (nr_vcpus * sizeof (struct uffd_desc * ));
187
+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
188
+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds ;
189
+
190
+ uffd_descs = malloc (num_uffds * sizeof (struct uffd_desc * ));
160
191
TEST_ASSERT (uffd_descs , "Memory allocation failed" );
161
- for (i = 0 ; i < nr_vcpus ; i ++ ) {
192
+ for (i = 0 ; i < num_uffds ; i ++ ) {
193
+ struct memstress_vcpu_args * vcpu_args ;
162
194
void * vcpu_hva ;
163
195
164
196
vcpu_args = & memstress_args .vcpu_args [i ];
@@ -171,7 +203,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
171
203
*/
172
204
uffd_descs [i ] = uffd_setup_demand_paging (
173
205
p -> uffd_mode , p -> uffd_delay , vcpu_hva ,
174
- vcpu_args -> pages * memstress_args .guest_page_size ,
206
+ uffd_region_size ,
207
+ p -> readers_per_uffd ,
175
208
& handle_uffd_page_request );
176
209
}
177
210
}
@@ -188,7 +221,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
188
221
189
222
if (p -> uffd_mode ) {
190
223
/* Tell the user fault fd handler threads to quit */
191
- for (i = 0 ; i < nr_vcpus ; i ++ )
224
+ for (i = 0 ; i < num_uffds ; i ++ )
192
225
uffd_stop_demand_paging (uffd_descs [i ]);
193
226
}
194
227
@@ -212,15 +245,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
212
245
static void help (char * name )
213
246
{
214
247
puts ("" );
215
- printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
216
- " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
248
+ printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
249
+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
250
+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
217
251
guest_modes_help ();
218
252
printf (" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
219
253
" UFFD registration mode: 'MISSING' or 'MINOR'.\n" );
220
254
kvm_print_vcpu_pinning_help ();
255
+ printf (" -a: Use a single userfaultfd for all of guest memory, instead of\n"
256
+ " creating one for each region paged by a unique vCPU\n"
257
+ " Set implicitly with -o, and no effect without -u.\n" );
221
258
printf (" -d: add a delay in usec to the User Fault\n"
222
259
" FD handler to simulate demand paging\n"
223
260
" overheads. Ignored without -u.\n" );
261
+ printf (" -r: Set the number of reader threads per uffd.\n" );
224
262
printf (" -b: specify the size of the memory region which should be\n"
225
263
" demand paged by each vCPU. e.g. 10M or 3G.\n"
226
264
" Default: 1G\n" );
@@ -239,12 +277,14 @@ int main(int argc, char *argv[])
239
277
struct test_params p = {
240
278
.src_type = DEFAULT_VM_MEM_SRC ,
241
279
.partition_vcpu_memory_access = true,
280
+ .readers_per_uffd = 1 ,
281
+ .single_uffd = false,
242
282
};
243
283
int opt ;
244
284
245
285
guest_modes_append_default ();
246
286
247
- while ((opt = getopt (argc , argv , "hm :u:d:b:s:v:c:o " )) != -1 ) {
287
+ while ((opt = getopt (argc , argv , "ahom :u:d:b:s:v:c:r: " )) != -1 ) {
248
288
switch (opt ) {
249
289
case 'm' :
250
290
guest_modes_cmdline (optarg );
@@ -256,6 +296,9 @@ int main(int argc, char *argv[])
256
296
p .uffd_mode = UFFDIO_REGISTER_MODE_MINOR ;
257
297
TEST_ASSERT (p .uffd_mode , "UFFD mode must be 'MISSING' or 'MINOR'." );
258
298
break ;
299
+ case 'a' :
300
+ p .single_uffd = true;
301
+ break ;
259
302
case 'd' :
260
303
p .uffd_delay = strtoul (optarg , NULL , 0 );
261
304
TEST_ASSERT (p .uffd_delay >= 0 , "A negative UFFD delay is not supported." );
@@ -276,6 +319,13 @@ int main(int argc, char *argv[])
276
319
break ;
277
320
case 'o' :
278
321
p .partition_vcpu_memory_access = false;
322
+ p .single_uffd = true;
323
+ break ;
324
+ case 'r' :
325
+ p .readers_per_uffd = atoi (optarg );
326
+ TEST_ASSERT (p .readers_per_uffd >= 1 ,
327
+ "Invalid number of readers per uffd %d: must be >=1" ,
328
+ p .readers_per_uffd );
279
329
break ;
280
330
case 'h' :
281
331
default :
0 commit comments