6
6
clippy:: cast_sign_loss,
7
7
clippy:: undocumented_unsafe_blocks,
8
8
clippy:: ptr_as_ptr,
9
+ clippy:: cast_possible_wrap,
9
10
// Not everything is used by both binaries
10
11
dead_code
11
12
) ]
@@ -17,6 +18,7 @@ use std::ffi::c_void;
17
18
use std:: fs:: File ;
18
19
use std:: io:: { Read , Write } ;
19
20
use std:: num:: NonZero ;
21
+ use std:: os:: fd:: RawFd ;
20
22
use std:: os:: unix:: io:: { AsRawFd , FromRawFd , IntoRawFd } ;
21
23
use std:: os:: unix:: net:: UnixStream ;
22
24
use std:: ptr;
@@ -26,10 +28,47 @@ use std::time::Duration;
26
28
use serde:: { Deserialize , Serialize } ;
27
29
use serde_json:: { Deserializer , StreamDeserializer } ;
28
30
use userfaultfd:: { Error , Event , Uffd } ;
31
+ use vmm_sys_util:: ioctl:: ioctl_with_mut_ref;
32
+ use vmm_sys_util:: ioctl_iowr_nr;
29
33
use vmm_sys_util:: sock_ctrl_msg:: ScmSocket ;
30
34
31
35
use crate :: uffd_utils:: userfault_bitmap:: UserfaultBitmap ;
32
36
37
+ // TODO: remove when UFFDIO_CONTINUE for guest_memfd is available in the crate
38
+ #[ repr( C ) ]
39
+ struct uffdio_continue {
40
+ range : uffdio_range ,
41
+ mode : u64 ,
42
+ mapped : u64 ,
43
+ }
44
+
45
+ ioctl_iowr_nr ! ( UFFDIO_CONTINUE , 0xAA , 0x7 , uffdio_continue) ;
46
+
47
+ #[ repr( C ) ]
48
+ struct uffdio_range {
49
+ start : u64 ,
50
+ len : u64 ,
51
+ }
52
+
53
+ pub fn uffd_continue ( uffd : RawFd , fault_addr : u64 , len : u64 ) -> std:: io:: Result < ( ) > {
54
+ let mut cont = uffdio_continue {
55
+ range : uffdio_range {
56
+ start : fault_addr,
57
+ len,
58
+ } ,
59
+ mode : 0 , // Normal continuation mode
60
+ mapped : 0 ,
61
+ } ;
62
+
63
+ let ret = unsafe { ioctl_with_mut_ref ( & uffd, UFFDIO_CONTINUE ( ) , & mut cont) } ;
64
+
65
+ if ret == -1 {
66
+ return Err ( std:: io:: Error :: last_os_error ( ) ) ;
67
+ }
68
+
69
+ Ok ( ( ) )
70
+ }
71
+
33
72
// This is the same with the one used in src/vmm.
34
73
/// This describes the mapping between Firecracker base virtual address and offset in the
35
74
/// buffer or file backend for a guest memory region. It is used to tell an external
@@ -122,7 +161,7 @@ pub struct UffdHandler {
122
161
pub mem_regions : Vec < GuestRegionUffdMapping > ,
123
162
pub page_size : usize ,
124
163
backing_buffer : * const u8 ,
125
- uffd : Uffd ,
164
+ pub uffd : Uffd ,
126
165
removed_pages : HashSet < u64 > ,
127
166
pub guest_memfd : Option < File > ,
128
167
pub guest_memfd_addr : Option < * mut u8 > ,
@@ -266,6 +305,20 @@ impl UffdHandler {
266
305
}
267
306
}
268
307
308
+ pub fn addr_to_offset ( & self , addr : * mut u8 ) -> u64 {
309
+ let addr = addr as u64 ;
310
+ for region in & self . mem_regions {
311
+ if region. contains ( addr) {
312
+ return addr - region. base_host_virt_addr + region. offset ;
313
+ }
314
+ }
315
+
316
+ panic ! (
317
+ "Could not find addr: {:#x} within guest region mappings." ,
318
+ addr
319
+ ) ;
320
+ }
321
+
269
322
pub fn serve_pf ( & mut self , addr : * mut u8 , len : usize ) -> bool {
270
323
// Find the start of the page that the current faulting address belongs to.
271
324
let dst = ( addr as usize & !( self . page_size - 1 ) ) as * mut libc:: c_void ;
@@ -278,7 +331,7 @@ impl UffdHandler {
278
331
279
332
for region in self . mem_regions . iter ( ) {
280
333
if region. contains ( fault_page_addr) {
281
- return self . populate_from_file ( region, fault_page_addr, len) ;
334
+ return self . populate_from_file ( & region. clone ( ) , fault_page_addr, len) ;
282
335
}
283
336
}
284
337
@@ -292,12 +345,61 @@ impl UffdHandler {
292
345
self . mem_regions . iter ( ) . map ( |r| r. size ) . sum ( )
293
346
}
294
347
295
- fn populate_from_file ( & self , region : & GuestRegionUffdMapping , dst : u64 , len : usize ) -> bool {
296
- let offset = dst - region. base_host_virt_addr ;
297
- let src = self . backing_buffer as u64 + region. offset + offset;
348
+ pub fn populate_via_write ( & mut self , offset : usize , len : usize ) -> usize {
349
+ // man 2 write:
350
+ //
351
+ // On Linux, write() (and similar system calls) will transfer at most
352
+ // 0x7ffff000 (2,147,479,552) bytes, returning the number of bytes
353
+ // actually transferred. (This is true on both 32-bit and 64-bit
354
+ // systems.)
355
+ const MAX_WRITE_LEN : usize = 2_147_479_552 ;
356
+
357
+ assert ! (
358
+ offset. checked_add( len) . unwrap( ) <= self . size( ) ,
359
+ "{} + {} >= {}" ,
360
+ offset,
361
+ len,
362
+ self . size( )
363
+ ) ;
364
+
365
+ let mut total_written = 0 ;
366
+
367
+ while total_written < len {
368
+ let src = unsafe { self . backing_buffer . add ( offset + total_written) } ;
369
+ let len_to_write = ( len - total_written) . min ( MAX_WRITE_LEN ) ;
370
+ let bytes_written = unsafe {
371
+ libc:: pwrite64 (
372
+ self . guest_memfd . as_ref ( ) . unwrap ( ) . as_raw_fd ( ) ,
373
+ src. cast ( ) ,
374
+ len_to_write,
375
+ ( offset + total_written) as libc:: off64_t ,
376
+ )
377
+ } ;
378
+
379
+ let bytes_written = match bytes_written {
380
+ -1 if vmm_sys_util:: errno:: Error :: last ( ) . errno ( ) == libc:: ENOSPC => 0 ,
381
+ written @ 0 .. => written as usize ,
382
+ _ => panic ! ( "{:?}" , std:: io:: Error :: last_os_error( ) ) ,
383
+ } ;
384
+
385
+ self . userfault_bitmap
386
+ . as_mut ( )
387
+ . unwrap ( )
388
+ . reset_addr_range ( offset + total_written, bytes_written) ;
389
+
390
+ total_written += bytes_written;
391
+
392
+ if bytes_written != len_to_write {
393
+ break ;
394
+ }
395
+ }
396
+
397
+ total_written
398
+ }
298
399
400
+ fn populate_via_uffdio_copy ( & self , src : * const u8 , dst : u64 , len : usize ) -> bool {
299
401
unsafe {
300
- match self . uffd . copy ( src as * const _ , dst as * mut _ , len, true ) {
402
+ match self . uffd . copy ( src. cast ( ) , dst as * mut _ , len, true ) {
301
403
// Make sure the UFFD copied some bytes.
302
404
Ok ( value) => assert ! ( value > 0 ) ,
303
405
// Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD
@@ -322,6 +424,42 @@ impl UffdHandler {
322
424
true
323
425
}
324
426
427
+ fn populate_via_memcpy ( & mut self , src : * const u8 , dst : u64 , offset : usize , len : usize ) -> bool {
428
+ let dst_memcpy = unsafe {
429
+ self . guest_memfd_addr
430
+ . expect ( "no guest_memfd addr" )
431
+ . add ( offset)
432
+ } ;
433
+
434
+ unsafe {
435
+ std:: ptr:: copy_nonoverlapping ( src, dst_memcpy, len) ;
436
+ }
437
+
438
+ self . userfault_bitmap
439
+ . as_mut ( )
440
+ . unwrap ( )
441
+ . reset_addr_range ( offset, len) ;
442
+
443
+ uffd_continue ( self . uffd . as_raw_fd ( ) , dst, len as u64 ) . expect ( "uffd_continue" ) ;
444
+
445
+ true
446
+ }
447
+
448
+ fn populate_from_file (
449
+ & mut self ,
450
+ region : & GuestRegionUffdMapping ,
451
+ dst : u64 ,
452
+ len : usize ,
453
+ ) -> bool {
454
+ let offset = ( region. offset + dst - region. base_host_virt_addr ) as usize ;
455
+ let src = unsafe { self . backing_buffer . add ( offset) } ;
456
+
457
+ match self . guest_memfd {
458
+ Some ( _) => self . populate_via_memcpy ( src, dst, offset, len) ,
459
+ None => self . populate_via_uffdio_copy ( src, dst, len) ,
460
+ }
461
+ }
462
+
325
463
fn zero_out ( & mut self , addr : u64 ) -> bool {
326
464
match unsafe { self . uffd . zeropage ( addr as * mut _ , self . page_size , true ) } {
327
465
Ok ( _) => true ,
@@ -614,7 +752,7 @@ mod tests {
614
752
let ( stream, _) = listener. accept ( ) . expect ( "Cannot listen on UDS socket" ) ;
615
753
// Update runtime with actual runtime
616
754
let runtime = uninit_runtime. write ( Runtime :: new ( stream, file) ) ;
617
- runtime. run ( |_: & mut UffdHandler | { } ) ;
755
+ runtime. run ( |_: & mut UffdHandler | { } , |_ : & mut UffdHandler , _ : usize | { } ) ;
618
756
} ) ;
619
757
620
758
// wait for runtime thread to initialize itself
0 commit comments