Skip to content

Commit b5733ae

Browse files
committed
firecracker adjustments for v12
- Drop setting memory attributes to private (workaround was needed to get KVM to fault non-coco VMs through guest_memfd always) - Drop no-kvmclock (we have a workaround patch now) - Drop VM types (guest_memfd is now supported on all vm types). - Update kvm capability numbers Signed-off-by: Patrick Roy <[email protected]>
1 parent 26b2922 commit b5733ae

File tree

9 files changed

+47
-90
lines changed

9 files changed

+47
-90
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ fn main() {
5050
let are_we_faulted_yet = uffd_handler
5151
.userfault_bitmap
5252
.as_mut()
53-
.map_or(false, |bitmap| !bitmap.is_bit_set(bit));
53+
.is_some_and(|bitmap| !bitmap.is_bit_set(bit));
5454

5555
if are_we_faulted_yet {
5656
// TODO: we currently ignore the result as we may attempt to

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,8 @@ fn main() {
119119
println!("uffdio_continue error: {:?}", err)
120120
});
121121
}
122-
} else {
123-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
124-
deferred_events.push(event);
125-
}
122+
} else if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
123+
deferred_events.push(event);
126124
}
127125
}
128126
userfaultfd::Event::Remove { start, end } => {

src/firecracker/examples/uffd/uffd_utils.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,10 @@ impl UffdHandler {
247247
match (&guest_memfd, &userfault_bitmap_memfd) {
248248
(Some(guestmem_file), Some(bitmap_file)) => {
249249
let guest_memfd_addr =
250-
Some(Self::mmap_helper(size, guestmem_file.as_raw_fd()) as *mut u8);
250+
Some(Self::mmap_helper(size, guestmem_file.as_raw_fd()).cast::<u8>());
251251

252-
let bitmap_ptr = Self::mmap_helper(size, bitmap_file.as_raw_fd()) as *mut AtomicU64;
252+
let bitmap_ptr =
253+
Self::mmap_helper(size, bitmap_file.as_raw_fd()).cast::<AtomicU64>();
253254

254255
// SAFETY: The bitmap pointer is valid and the size is correct.
255256
let userfault_bitmap = Some(unsafe {
@@ -302,7 +303,7 @@ impl UffdHandler {
302303
let addr = addr as u64;
303304
for region in &self.mem_regions {
304305
if region.contains(addr) {
305-
return addr - region.base_host_virt_addr + region.offset as u64;
306+
return addr - region.base_host_virt_addr + region.offset;
306307
}
307308
}
308309

@@ -606,7 +607,7 @@ impl Runtime {
606607
) -> UffdHandler {
607608
let mut message_buf = vec![0u8; 1024];
608609
let mut iovecs = [libc::iovec {
609-
iov_base: message_buf.as_mut_ptr() as *mut libc::c_void,
610+
iov_base: message_buf.as_mut_ptr().cast::<libc::c_void>(),
610611
iov_len: message_buf.len(),
611612
}];
612613
let mut fds = [0; 3];
@@ -686,7 +687,7 @@ impl Runtime {
686687
if pollfds[i].revents & libc::POLLIN != 0 {
687688
nready -= 1;
688689
if pollfds[i].fd == self.stream.as_raw_fd() {
689-
while let Some(fault_request) = uffd_msg_iter.next() {
690+
for fault_request in uffd_msg_iter.by_ref() {
690691
let page_size = self.handler.page_size;
691692

692693
assert!(

src/vmm/src/arch/aarch64/vm.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@ use crate::arch::aarch64::gic::GicState;
88
use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState};
99
use crate::vstate::vm::{VmCommon, VmError};
1010

11-
/// The VM type for this architecture that allows us to use guest_memfd. On ARM, all VMs
12-
/// support guest_memfd and no special type is needed (in fact, no concept of vm types really
13-
/// exists, and the correspoding field of the CREATE_VM ioctl determines IPA size instead,
14-
/// e.g. the size of the guest physical address space. This value cannot be hardcoded, hence
15-
/// `None` to let the `Vm` constructor now that just normal [`Kvm::create_vm`] should be called,
16-
/// which internally determines the preferred IPA size.
17-
pub const VM_TYPE_FOR_SECRET_FREEDOM: Option<u64> = None;
18-
1911
/// Structure representing the current architecture's understand of what a "virtual machine" is.
2012
#[derive(Debug)]
2113
pub struct ArchVm {

src/vmm/src/arch/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub use aarch64::kvm::{Kvm, KvmArchError, OptionalCapabilities};
1717
#[cfg(target_arch = "aarch64")]
1818
pub use aarch64::vcpu::*;
1919
#[cfg(target_arch = "aarch64")]
20-
pub use aarch64::vm::{ArchVm, ArchVmError, VM_TYPE_FOR_SECRET_FREEDOM, VmState};
20+
pub use aarch64::vm::{ArchVm, ArchVmError, VmState};
2121
#[cfg(target_arch = "aarch64")]
2222
pub use aarch64::{
2323
ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions,
@@ -35,7 +35,7 @@ pub use x86_64::kvm::{Kvm, KvmArchError};
3535
#[cfg(target_arch = "x86_64")]
3636
pub use x86_64::vcpu::*;
3737
#[cfg(target_arch = "x86_64")]
38-
pub use x86_64::vm::{ArchVm, ArchVmError, VM_TYPE_FOR_SECRET_FREEDOM, VmState};
38+
pub use x86_64::vm::{ArchVm, ArchVmError, VmState};
3939

4040
#[cfg(target_arch = "x86_64")]
4141
pub use crate::arch::x86_64::{

src/vmm/src/arch/x86_64/vm.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ use std::fmt;
55

66
use kvm_bindings::{
77
KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE,
8-
KVM_PIT_SPEAKER_DUMMY, KVM_X86_SW_PROTECTED_VM, MsrList, kvm_clock_data, kvm_irqchip,
9-
kvm_pit_config, kvm_pit_state2,
8+
KVM_PIT_SPEAKER_DUMMY, MsrList, kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2,
109
};
1110
use kvm_ioctls::Cap;
1211
use serde::{Deserialize, Serialize};
@@ -47,9 +46,6 @@ pub enum ArchVmError {
4746
SetTssAddress(kvm_ioctls::Error),
4847
}
4948

50-
/// The VM type for this architecture that allows us to use guest_memfd.
51-
pub const VM_TYPE_FOR_SECRET_FREEDOM: Option<u64> = Some(KVM_X86_SW_PROTECTED_VM as u64);
52-
5349
/// Structure representing the current architecture's understand of what a "virtual machine" is.
5450
#[derive(Debug)]
5551
pub struct ArchVm {

src/vmm/src/builder.rs

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::sync::mpsc;
1313
use std::sync::{Arc, Mutex};
1414

1515
use event_manager::{MutEventSubscriber, SubscriberOps};
16+
use kvm_ioctls::Cap;
1617
use libc::EFD_NONBLOCK;
1718
use linux_loader::cmdline::Cmdline as LoaderKernelCmdline;
1819
use utils::time::TimestampUs;
@@ -68,7 +69,7 @@ use crate::vmm_config::snapshot::{LoadSnapshotParams, MemBackendType};
6869
use crate::vstate::kvm::Kvm;
6970
use crate::vstate::memory::{MaybeBounce, create_memfd};
7071
use crate::vstate::vcpu::{Vcpu, VcpuError};
71-
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
72+
use crate::vstate::vm::{GUEST_MEMFD_FLAG_NO_DIRECT_MAP, GUEST_MEMFD_FLAG_SUPPORT_SHARED, Vm};
7273
use crate::{EventManager, Vmm, VmmError, device_manager};
7374

7475
/// Errors associated with starting the instance.
@@ -147,9 +148,15 @@ fn create_vmm_and_vcpus(
147148
instance_info: &InstanceInfo,
148149
event_manager: &mut EventManager,
149150
vcpu_count: u8,
150-
kvm_capabilities: Vec<KvmCapability>,
151+
mut kvm_capabilities: Vec<KvmCapability>,
151152
secret_free: bool,
152153
) -> Result<(Vmm, Vec<Vcpu>), VmmError> {
154+
if secret_free {
155+
kvm_capabilities.push(KvmCapability::Add(Cap::GuestMemfd as u32));
156+
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GMEM_SHARED_MEM));
157+
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GMEM_NO_DIRECT_MAP));
158+
}
159+
153160
let kvm = Kvm::new(kvm_capabilities)?;
154161
// Set up Kvm Vm and register memory regions.
155162
// Build custom CPU config if a custom template is provided.
@@ -238,23 +245,21 @@ pub fn build_microvm_for_boot(
238245

239246
let secret_free = vm_resources.machine_config.secret_free;
240247

241-
#[cfg(target_arch = "x86_64")]
242-
if secret_free {
243-
boot_cmdline.insert_str("no-kvmclock")?;
244-
}
245-
246248
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
247249
instance_info,
248250
event_manager,
249251
vm_resources.machine_config.vcpu_count,
250252
cpu_template.kvm_capabilities.clone(),
251-
vm_resources.machine_config.secret_free,
253+
secret_free,
252254
)?;
253255

254256
let guest_memfd = match secret_free {
255257
true => Some(
256258
vmm.vm
257-
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
259+
.create_guest_memfd(
260+
vm_resources.memory_size(),
261+
GUEST_MEMFD_FLAG_SUPPORT_SHARED | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
262+
)
258263
.map_err(VmmError::Vm)?,
259264
),
260265
false => None,
@@ -268,9 +273,6 @@ pub fn build_microvm_for_boot(
268273
.register_memory_regions(guest_memory, None)
269274
.map_err(VmmError::Vm)?;
270275

271-
#[cfg(target_arch = "x86_64")]
272-
vmm.vm.set_memory_private().map_err(VmmError::Vm)?;
273-
274276
let entry_point = load_kernel(
275277
MaybeBounce::<_, 4096>::new_persistent(
276278
boot_config.kernel_file.try_clone().unwrap(),
@@ -518,6 +520,10 @@ fn memfd_to_slice(memfd: &Option<File>) -> Option<&mut [u8]> {
518520
}
519521
}
520522

523+
const KVM_CAP_GMEM_SHARED_MEM: u32 = 243;
524+
const KVM_CAP_GMEM_NO_DIRECT_MAP: u32 = 244;
525+
const KVM_CAP_USERFAULT: u32 = 245;
526+
521527
/// Builds and starts a microVM based on the provided MicrovmState.
522528
///
523529
/// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another
@@ -531,15 +537,13 @@ pub fn build_microvm_from_snapshot(
531537
params: &LoadSnapshotParams,
532538
vm_resources: &mut VmResources,
533539
) -> Result<Arc<Mutex<Vmm>>, BuildMicrovmFromSnapshotError> {
534-
// TODO: take it from kvm-bindings when userfault support is merged upstream
535-
const KVM_CAP_USERFAULT: u32 = 241;
536-
537540
// Build Vmm.
538541
debug!("event_start: build microvm from snapshot");
539542

540543
let secret_free = vm_resources.machine_config.secret_free;
541544

542545
let mut kvm_capabilities = microvm_state.kvm_state.kvm_cap_modifiers.clone();
546+
543547
if secret_free {
544548
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_USERFAULT));
545549
}
@@ -556,7 +560,10 @@ pub fn build_microvm_from_snapshot(
556560
let guest_memfd = match secret_free {
557561
true => Some(
558562
vmm.vm
559-
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
563+
.create_guest_memfd(
564+
vm_resources.memory_size(),
565+
GUEST_MEMFD_FLAG_SUPPORT_SHARED | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
566+
)
560567
.map_err(VmmError::Vm)?,
561568
),
562569
false => None,
@@ -622,9 +629,6 @@ pub fn build_microvm_from_snapshot(
622629
vmm.uffd = uffd;
623630
vmm.uffd_socket = socket;
624631

625-
#[cfg(target_arch = "x86_64")]
626-
vmm.vm.set_memory_private().map_err(VmmError::Vm)?;
627-
628632
#[cfg(target_arch = "x86_64")]
629633
{
630634
// Scale TSC to match, extract the TSC freq from the state if specified

src/vmm/src/vstate/vcpu.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,10 @@ impl Vcpu {
328328
// does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html .
329329
// We do not want to fail if the call is not successful, because depending
330330
// that may be acceptable depending on the workload.
331-
// TODO: once kvmclock is supported with Secret Fredom, remove this condition.
332331
#[cfg(target_arch = "x86_64")]
333-
if self.userfault_resolved.is_none() {
334-
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
335-
METRICS.vcpu.kvmclock_ctrl_fails.inc();
336-
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
337-
}
332+
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
333+
METRICS.vcpu.kvmclock_ctrl_fails.inc();
334+
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
338335
}
339336

340337
return StateMachine::next(Self::paused);
@@ -360,13 +357,10 @@ impl Vcpu {
360357
// does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html .
361358
// We do not want to fail if the call is not successful, because depending
362359
// that may be acceptable depending on the workload.
363-
// TODO: once kvmclock is supported with Secret Fredom, remove this condition.
364360
#[cfg(target_arch = "x86_64")]
365-
if self.userfault_resolved.is_none() {
366-
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
367-
METRICS.vcpu.kvmclock_ctrl_fails.inc();
368-
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
369-
}
361+
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
362+
METRICS.vcpu.kvmclock_ctrl_fails.inc();
363+
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
370364
}
371365

372366
// Move to 'paused' state.

src/vmm/src/vstate/vm.rs

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ use std::path::Path;
1313
use std::sync::{Arc, Condvar, Mutex};
1414

1515
use kvm_bindings::{
16-
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
17-
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
16+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVMIO, kvm_create_guest_memfd,
17+
kvm_userspace_memory_region,
1818
};
1919
use kvm_ioctls::{Cap, VmFd};
2020
use vmm_sys_util::eventfd::EventFd;
2121
use vmm_sys_util::ioctl::ioctl_with_ref;
2222
use vmm_sys_util::ioctl_iow_nr;
2323

24+
use crate::arch::host_page_size;
2425
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
25-
use crate::arch::{VM_TYPE_FOR_SECRET_FREEDOM, host_page_size};
2626
use crate::logger::info;
2727
use crate::persist::CreateSnapshotError;
2828
use crate::utils::u64_to_usize;
@@ -34,7 +34,8 @@ use crate::vstate::memory::{
3434
use crate::vstate::vcpu::VcpuError;
3535
use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3636

37-
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
37+
pub(crate) const GUEST_MEMFD_FLAG_SUPPORT_SHARED: u64 = 1 << 0;
38+
pub(crate) const GUEST_MEMFD_FLAG_NO_DIRECT_MAP: u64 = 1 << 1;
3839

3940
/// KVM userfault information
4041
#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)]
@@ -137,14 +138,7 @@ impl Vm {
137138
const MAX_ATTEMPTS: u32 = 5;
138139
let mut attempt = 1;
139140
let fd = loop {
140-
let create_result = if secret_free && VM_TYPE_FOR_SECRET_FREEDOM.is_some() {
141-
kvm.fd
142-
.create_vm_with_type(VM_TYPE_FOR_SECRET_FREEDOM.unwrap())
143-
} else {
144-
kvm.fd.create_vm()
145-
};
146-
147-
match create_result {
141+
match kvm.fd.create_vm() {
148142
Ok(fd) => break fd,
149143
Err(e) if e.errno() == libc::EINTR && attempt < MAX_ATTEMPTS => {
150144
info!("Attempt #{attempt} of KVM_CREATE_VM returned EINTR");
@@ -371,28 +365,6 @@ impl Vm {
371365
&self.common.guest_memory
372366
}
373367

374-
/// Sets the memory attributes on all guest_memfd-backed regions to private
375-
pub fn set_memory_private(&self) -> Result<(), VmError> {
376-
if !self.secret_free() {
377-
return Ok(());
378-
}
379-
380-
for region in self.guest_memory().iter() {
381-
let attr = kvm_memory_attributes {
382-
address: region.start_addr().0,
383-
size: region.len(),
384-
attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64,
385-
..Default::default()
386-
};
387-
388-
self.fd()
389-
.set_memory_attributes(attr)
390-
.map_err(VmError::SetMemoryAttributes)?
391-
}
392-
393-
Ok(())
394-
}
395-
396368
/// Resets the KVM dirty bitmap for each of the guest's memory regions.
397369
pub fn reset_dirty_bitmap(&self) {
398370
self.guest_memory()

0 commit comments

Comments
 (0)