@@ -9,6 +9,7 @@ use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES};
99use kvm_ioctls:: VmFd ;
1010use vmm_sys_util:: eventfd:: EventFd ;
1111
12+ use crate :: logger:: info;
1213use crate :: vstate:: memory:: { Address , GuestMemory , GuestMemoryMmap , GuestMemoryRegion } ;
1314
1415#[ cfg( target_arch = "x86_64" ) ]
@@ -42,6 +43,42 @@ pub enum VmError {
4243
4344/// Contains Vm functions that are usable across CPU architectures
4445impl Vm {
46+ fn create_vm ( kvm : & crate :: vstate:: kvm:: Kvm ) -> Result < VmFd , VmError > {
47+ // It is known that KVM_CREATE_VM occasionally fails with EINTR on heavily loaded machines
48+ // with many VMs.
49+ //
50+ // The behavior itself that KVM_CREATE_VM can return EINTR is intentional. This is because
51+ // the KVM_CREATE_VM path includes mm_take_all_locks() that is CPU intensive and all CPU
52+ // intensive syscalls should check for pending signals and return EINTR immediately to allow
53+ // userland to remain interactive.
54+ // https://lists.nongnu.org/archive/html/qemu-devel/2014-01/msg01740.html
55+ //
56+ // However, it is empirically confirmed that, even though there is no pending signal,
57+ // KVM_CREATE_VM returns EINTR.
58+ // https://lore.kernel.org/qemu-devel/[email protected] / 59+ //
60+ // To mitigate it, QEMU does an inifinite retry on EINTR that greatly improves reliabiliy:
61+ // - https://github.com/qemu/qemu/commit/94ccff133820552a859c0fb95e33a539e0b90a75
62+ // - https://github.com/qemu/qemu/commit/bbde13cd14ad4eec18529ce0bf5876058464e124
63+ //
64+ // Similarly, we do retries up to 5 times. Although Firecracker clients are also able to
65+ // retry, they have to start Firecracker from scratch. Doing retries in Firecracker makes
66+ // recovery faster and improves reliability.
67+ const MAX_ATTEMPTS : u32 = 5 ;
68+ for attempt in 1 ..=MAX_ATTEMPTS {
69+ match kvm. fd . create_vm ( ) {
70+ Ok ( fd) => return Ok ( fd) ,
71+ Err ( e) if e. errno ( ) == libc:: EINTR && attempt < MAX_ATTEMPTS => {
72+ info ! ( "Attemp #{attempt} of KVM_CREATE_VM returned EINTR" ) ;
73+ // Exponential backoff (1us, 2us, 4us, and 8us => 15us in total)
74+ std:: thread:: sleep ( std:: time:: Duration :: from_micros ( 2u64 . pow ( attempt - 1 ) ) ) ;
75+ }
76+ Err ( e) => return Err ( VmError :: CreateVm ( e) ) ,
77+ }
78+ }
79+ unreachable ! ( ) ;
80+ }
81+
4582 /// Creates the specified number of [`Vcpu`]s.
4683 ///
4784 /// The returned [`EventFd`] is written to whenever any of the vcpus exit.
0 commit comments