@@ -21,10 +21,47 @@ use std::time::Duration;
21
21
use serde:: { Deserialize , Serialize } ;
22
22
use serde_json:: { Deserializer , StreamDeserializer } ;
23
23
use userfaultfd:: { Error , Event , Uffd } ;
24
+ use vmm_sys_util:: ioctl:: ioctl_with_mut_ref;
24
25
use vmm_sys_util:: sock_ctrl_msg:: ScmSocket ;
26
+ use vmm_sys_util:: { ioctl_ioc_nr, ioctl_iowr_nr} ;
25
27
26
28
use crate :: uffd_utils:: userfault_bitmap:: UserfaultBitmap ;
27
29
30
+ // TODO: remove when UFFDIO_CONTINUE for guest_memfd is available in the crate
31
+ #[ repr( C ) ]
32
+ struct uffdio_continue {
33
+ range : uffdio_range ,
34
+ mode : u64 ,
35
+ mapped : u64 ,
36
+ }
37
+
38
+ ioctl_iowr_nr ! ( UFFDIO_CONTINUE , 0xAA , 0x7 , uffdio_continue) ;
39
+
40
+ #[ repr( C ) ]
41
+ struct uffdio_range {
42
+ start : u64 ,
43
+ len : u64 ,
44
+ }
45
+
46
+ pub fn uffd_continue ( uffd : RawFd , fault_addr : u64 , len : u64 ) -> std:: io:: Result < ( ) > {
47
+ let mut cont = uffdio_continue {
48
+ range : uffdio_range {
49
+ start : fault_addr,
50
+ len,
51
+ } ,
52
+ mode : 0 , // Normal continuation mode
53
+ mapped : 0 ,
54
+ } ;
55
+
56
+ let ret = unsafe { ioctl_with_mut_ref ( & uffd, UFFDIO_CONTINUE ( ) , & mut cont) } ;
57
+
58
+ if ret == -1 {
59
+ return Err ( std:: io:: Error :: last_os_error ( ) ) ;
60
+ }
61
+
62
+ Ok ( ( ) )
63
+ }
64
+
28
65
// This is the same with the one used in src/vmm.
29
66
/// This describes the mapping between Firecracker base virtual address and offset in the
30
67
/// buffer or file backend for a guest memory region. It is used to tell an external
@@ -117,7 +154,7 @@ pub struct UffdHandler {
117
154
pub mem_regions : Vec < GuestRegionUffdMapping > ,
118
155
pub page_size : usize ,
119
156
backing_buffer : * const u8 ,
120
- uffd : Uffd ,
157
+ pub uffd : Uffd ,
121
158
removed_pages : HashSet < u64 > ,
122
159
pub guest_memfd : Option < File > ,
123
160
pub guest_memfd_addr : Option < * mut u8 > ,
@@ -261,6 +298,20 @@ impl UffdHandler {
261
298
}
262
299
}
263
300
301
+ pub fn addr_to_offset ( & self , addr : * mut u8 ) -> u64 {
302
+ let addr = addr as u64 ;
303
+ for region in & self . mem_regions {
304
+ if region. contains ( addr) {
305
+ return addr - region. base_host_virt_addr + region. offset as u64 ;
306
+ }
307
+ }
308
+
309
+ panic ! (
310
+ "Could not find addr: {:#x} within guest region mappings." ,
311
+ addr
312
+ ) ;
313
+ }
314
+
264
315
pub fn serve_pf ( & mut self , addr : * mut u8 , len : usize ) -> bool {
265
316
// Find the start of the page that the current faulting address belongs to.
266
317
let dst = ( addr as usize & !( self . page_size - 1 ) ) as * mut libc:: c_void ;
@@ -273,7 +324,7 @@ impl UffdHandler {
273
324
} else {
274
325
for region in self . mem_regions . iter ( ) {
275
326
if region. contains ( fault_page_addr) {
276
- return self . populate_from_file ( region, fault_page_addr, len) ;
327
+ return self . populate_from_file ( & region. clone ( ) , fault_page_addr, len) ;
277
328
}
278
329
}
279
330
}
@@ -288,12 +339,61 @@ impl UffdHandler {
288
339
self . mem_regions . iter ( ) . map ( |r| r. size ) . sum ( )
289
340
}
290
341
291
- fn populate_from_file ( & self , region : & GuestRegionUffdMapping , dst : u64 , len : usize ) -> bool {
292
- let offset = dst - region. base_host_virt_addr ;
293
- let src = self . backing_buffer as u64 + region. offset + offset;
342
+ pub fn populate_via_write ( & mut self , offset : usize , len : usize ) -> usize {
343
+ // man 2 write:
344
+ //
345
+ // On Linux, write() (and similar system calls) will transfer at most
346
+ // 0x7ffff000 (2,147,479,552) bytes, returning the number of bytes
347
+ // actually transferred. (This is true on both 32-bit and 64-bit
348
+ // systems.)
349
+ const MAX_WRITE_LEN : usize = 2_147_479_552 ;
350
+
351
+ assert ! (
352
+ offset. checked_add( len) . unwrap( ) <= self . size( ) ,
353
+ "{} + {} >= {}" ,
354
+ offset,
355
+ len,
356
+ self . size( )
357
+ ) ;
358
+
359
+ let mut total_written = 0 ;
360
+
361
+ while total_written < len {
362
+ let src = unsafe { self . backing_buffer . add ( offset + total_written) } ;
363
+ let len_to_write = ( len - total_written) . min ( MAX_WRITE_LEN ) ;
364
+ let bytes_written = unsafe {
365
+ libc:: pwrite64 (
366
+ self . guest_memfd . as_ref ( ) . unwrap ( ) . as_raw_fd ( ) ,
367
+ src. cast ( ) ,
368
+ len_to_write,
369
+ ( offset + total_written) as libc:: off64_t ,
370
+ )
371
+ } ;
372
+
373
+ let bytes_written = match bytes_written {
374
+ -1 if vmm_sys_util:: errno:: Error :: last ( ) . errno ( ) == libc:: ENOSPC => 0 ,
375
+ written @ 0 .. => written as usize ,
376
+ _ => panic ! ( "{:?}" , std:: io:: Error :: last_os_error( ) ) ,
377
+ } ;
378
+
379
+ self . userfault_bitmap
380
+ . as_mut ( )
381
+ . unwrap ( )
382
+ . reset_addr_range ( offset + total_written, bytes_written) ;
383
+
384
+ total_written += bytes_written;
385
+
386
+ if bytes_written != len_to_write {
387
+ break ;
388
+ }
389
+ }
390
+
391
+ total_written
392
+ }
294
393
394
+ fn populate_via_uffdio_copy ( & self , src : * const u8 , dst : u64 , len : usize ) -> bool {
295
395
unsafe {
296
- match self . uffd . copy ( src as * const _ , dst as * mut _ , len, true ) {
396
+ match self . uffd . copy ( src. cast ( ) , dst as * mut _ , len, true ) {
297
397
// Make sure the UFFD copied some bytes.
298
398
Ok ( value) => assert ! ( value > 0 ) ,
299
399
// Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD
@@ -318,6 +418,42 @@ impl UffdHandler {
318
418
true
319
419
}
320
420
421
+ fn populate_via_memcpy ( & mut self , src : * const u8 , dst : u64 , offset : usize , len : usize ) -> bool {
422
+ let dst_memcpy = unsafe {
423
+ self . guest_memfd_addr
424
+ . expect ( "no guest_memfd addr" )
425
+ . add ( offset)
426
+ } ;
427
+
428
+ unsafe {
429
+ std:: ptr:: copy_nonoverlapping ( src, dst_memcpy, len) ;
430
+ }
431
+
432
+ self . userfault_bitmap
433
+ . as_mut ( )
434
+ . unwrap ( )
435
+ . reset_addr_range ( offset, len) ;
436
+
437
+ uffd_continue ( self . uffd . as_raw_fd ( ) , dst, len as u64 ) . expect ( "uffd_continue" ) ;
438
+
439
+ true
440
+ }
441
+
442
+ fn populate_from_file (
443
+ & mut self ,
444
+ region : & GuestRegionUffdMapping ,
445
+ dst : u64 ,
446
+ len : usize ,
447
+ ) -> bool {
448
+ let offset = ( region. offset + dst - region. base_host_virt_addr ) as usize ;
449
+ let src = unsafe { self . backing_buffer . add ( offset) } ;
450
+
451
+ match self . guest_memfd {
452
+ Some ( _) => self . populate_via_memcpy ( src, dst, offset, len) ,
453
+ None => self . populate_via_uffdio_copy ( src, dst, len) ,
454
+ }
455
+ }
456
+
321
457
fn zero_out ( & mut self , addr : u64 ) {
322
458
let ret = unsafe {
323
459
self . uffd
@@ -619,7 +755,7 @@ mod tests {
619
755
let ( stream, _) = listener. accept ( ) . expect ( "Cannot listen on UDS socket" ) ;
620
756
// Update runtime with actual runtime
621
757
let runtime = uninit_runtime. write ( Runtime :: new ( stream, file) ) ;
622
- runtime. run ( |_: & mut UffdHandler | { } ) ;
758
+ runtime. run ( |_: & mut UffdHandler | { } , |_ : & mut UffdHandler , _ : usize | { } ) ;
623
759
} ) ;
624
760
625
761
// wait for runtime thread to initialize itself
0 commit comments