Skip to content

Commit 6d40e44

Browse files
committed
firecracker adjustments for v12
- Drop setting memory attributes to private (workaround was needed to get KVM to fault non-coco VMs through guest_memfd always) - Drop no-kvmclock (we have a workaround patch now) - Drop VM types (guest_memfd is now supported on all vm types). - Update kvm capability numbers Signed-off-by: Patrick Roy <[email protected]>
1 parent a964591 commit 6d40e44

File tree

7 files changed

+42
-83
lines changed

7 files changed

+42
-83
lines changed

src/firecracker/examples/uffd/uffd_utils.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,10 @@ impl UffdHandler {
254254
match (&guest_memfd, &userfault_bitmap_memfd) {
255255
(Some(guestmem_file), Some(bitmap_file)) => {
256256
let guest_memfd_addr =
257-
Some(Self::mmap_helper(size, guestmem_file.as_raw_fd()) as *mut u8);
257+
Some(Self::mmap_helper(size, guestmem_file.as_raw_fd()).cast::<u8>());
258258

259-
let bitmap_ptr = Self::mmap_helper(size, bitmap_file.as_raw_fd()) as *mut AtomicU64;
259+
let bitmap_ptr =
260+
Self::mmap_helper(size, bitmap_file.as_raw_fd()).cast::<AtomicU64>();
260261

261262
// SAFETY: The bitmap pointer is valid and the size is correct.
262263
let userfault_bitmap = Some(unsafe {
@@ -613,7 +614,7 @@ impl Runtime {
613614
) -> UffdHandler {
614615
let mut message_buf = vec![0u8; 1024];
615616
let mut iovecs = [libc::iovec {
616-
iov_base: message_buf.as_mut_ptr() as *mut libc::c_void,
617+
iov_base: message_buf.as_mut_ptr().cast::<libc::c_void>(),
617618
iov_len: message_buf.len(),
618619
}];
619620
let mut fds = [0; 3];

src/vmm/src/arch/aarch64/vm.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@ use crate::arch::aarch64::gic::GicState;
88
use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState};
99
use crate::vstate::vm::{VmCommon, VmError};
1010

11-
/// The VM type for this architecture that allows us to use guest_memfd. On ARM, all VMs
12-
/// support guest_memfd and no special type is needed (in fact, no concept of vm types really
13-
/// exists, and the correspoding field of the CREATE_VM ioctl determines IPA size instead,
14-
/// e.g. the size of the guest physical address space. This value cannot be hardcoded, hence
15-
/// `None` to let the `Vm` constructor now that just normal [`Kvm::create_vm`] should be called,
16-
/// which internally determines the preferred IPA size.
17-
pub const VM_TYPE_FOR_SECRET_FREEDOM: Option<u64> = None;
18-
1911
/// Structure representing the current architecture's understand of what a "virtual machine" is.
2012
#[derive(Debug)]
2113
pub struct ArchVm {

src/vmm/src/arch/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub use aarch64::kvm::{Kvm, KvmArchError, OptionalCapabilities};
1717
#[cfg(target_arch = "aarch64")]
1818
pub use aarch64::vcpu::*;
1919
#[cfg(target_arch = "aarch64")]
20-
pub use aarch64::vm::{ArchVm, ArchVmError, VM_TYPE_FOR_SECRET_FREEDOM, VmState};
20+
pub use aarch64::vm::{ArchVm, ArchVmError, VmState};
2121
#[cfg(target_arch = "aarch64")]
2222
pub use aarch64::{
2323
ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions,
@@ -35,7 +35,7 @@ pub use x86_64::kvm::{Kvm, KvmArchError};
3535
#[cfg(target_arch = "x86_64")]
3636
pub use x86_64::vcpu::*;
3737
#[cfg(target_arch = "x86_64")]
38-
pub use x86_64::vm::{ArchVm, ArchVmError, VM_TYPE_FOR_SECRET_FREEDOM, VmState};
38+
pub use x86_64::vm::{ArchVm, ArchVmError, VmState};
3939

4040
#[cfg(target_arch = "x86_64")]
4141
pub use crate::arch::x86_64::{

src/vmm/src/arch/x86_64/vm.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ use std::fmt;
55

66
use kvm_bindings::{
77
KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE,
8-
KVM_PIT_SPEAKER_DUMMY, KVM_X86_SW_PROTECTED_VM, MsrList, kvm_clock_data, kvm_irqchip,
9-
kvm_pit_config, kvm_pit_state2,
8+
KVM_PIT_SPEAKER_DUMMY, MsrList, kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2,
109
};
1110
use kvm_ioctls::Cap;
1211
use serde::{Deserialize, Serialize};
@@ -47,9 +46,6 @@ pub enum ArchVmError {
4746
SetTssAddress(kvm_ioctls::Error),
4847
}
4948

50-
/// The VM type for this architecture that allows us to use guest_memfd.
51-
pub const VM_TYPE_FOR_SECRET_FREEDOM: Option<u64> = Some(KVM_X86_SW_PROTECTED_VM as u64);
52-
5349
/// Structure representing the current architecture's understand of what a "virtual machine" is.
5450
#[derive(Debug)]
5551
pub struct ArchVm {

src/vmm/src/builder.rs

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::sync::mpsc;
1313
use std::sync::{Arc, Mutex};
1414

1515
use event_manager::{MutEventSubscriber, SubscriberOps};
16+
use kvm_ioctls::Cap;
1617
use libc::EFD_NONBLOCK;
1718
use linux_loader::cmdline::Cmdline as LoaderKernelCmdline;
1819
use utils::time::TimestampUs;
@@ -68,7 +69,7 @@ use crate::vmm_config::snapshot::{LoadSnapshotParams, MemBackendType};
6869
use crate::vstate::kvm::Kvm;
6970
use crate::vstate::memory::{MaybeBounce, create_memfd};
7071
use crate::vstate::vcpu::{Vcpu, VcpuError};
71-
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
72+
use crate::vstate::vm::{GUEST_MEMFD_FLAG_NO_DIRECT_MAP, GUEST_MEMFD_FLAG_SUPPORT_SHARED, Vm};
7273
use crate::{EventManager, Vmm, VmmError, device_manager};
7374

7475
/// Errors associated with starting the instance.
@@ -147,9 +148,15 @@ fn create_vmm_and_vcpus(
147148
instance_info: &InstanceInfo,
148149
event_manager: &mut EventManager,
149150
vcpu_count: u8,
150-
kvm_capabilities: Vec<KvmCapability>,
151+
mut kvm_capabilities: Vec<KvmCapability>,
151152
secret_free: bool,
152153
) -> Result<(Vmm, Vec<Vcpu>), VmmError> {
154+
if secret_free {
155+
kvm_capabilities.push(KvmCapability::Add(Cap::GuestMemfd as u32));
156+
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GMEM_SHARED_MEM));
157+
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GMEM_NO_DIRECT_MAP));
158+
}
159+
153160
let kvm = Kvm::new(kvm_capabilities)?;
154161
// Set up Kvm Vm and register memory regions.
155162
// Build custom CPU config if a custom template is provided.
@@ -238,23 +245,21 @@ pub fn build_microvm_for_boot(
238245

239246
let secret_free = vm_resources.machine_config.secret_free;
240247

241-
#[cfg(target_arch = "x86_64")]
242-
if secret_free {
243-
boot_cmdline.insert_str("no-kvmclock")?;
244-
}
245-
246248
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
247249
instance_info,
248250
event_manager,
249251
vm_resources.machine_config.vcpu_count,
250252
cpu_template.kvm_capabilities.clone(),
251-
vm_resources.machine_config.secret_free,
253+
secret_free,
252254
)?;
253255

254256
let guest_memfd = match secret_free {
255257
true => Some(
256258
vmm.vm
257-
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
259+
.create_guest_memfd(
260+
vm_resources.memory_size(),
261+
GUEST_MEMFD_FLAG_SUPPORT_SHARED | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
262+
)
258263
.map_err(VmmError::Vm)?,
259264
),
260265
false => None,
@@ -268,9 +273,6 @@ pub fn build_microvm_for_boot(
268273
.register_memory_regions(guest_memory, None)
269274
.map_err(VmmError::Vm)?;
270275

271-
#[cfg(target_arch = "x86_64")]
272-
vmm.vm.set_memory_private().map_err(VmmError::Vm)?;
273-
274276
let entry_point = load_kernel(
275277
MaybeBounce::<_, 4096>::new_persistent(
276278
boot_config.kernel_file.try_clone().unwrap(),
@@ -518,6 +520,10 @@ fn memfd_to_slice(memfd: &Option<File>) -> Option<&mut [u8]> {
518520
}
519521
}
520522

523+
const KVM_CAP_GMEM_SHARED_MEM: u32 = 243;
524+
const KVM_CAP_GMEM_NO_DIRECT_MAP: u32 = 244;
525+
const KVM_CAP_USERFAULT: u32 = 245;
526+
521527
/// Builds and starts a microVM based on the provided MicrovmState.
522528
///
523529
/// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another
@@ -531,15 +537,13 @@ pub fn build_microvm_from_snapshot(
531537
params: &LoadSnapshotParams,
532538
vm_resources: &mut VmResources,
533539
) -> Result<Arc<Mutex<Vmm>>, BuildMicrovmFromSnapshotError> {
534-
// TODO: take it from kvm-bindings when userfault support is merged upstream
535-
const KVM_CAP_USERFAULT: u32 = 241;
536-
537540
// Build Vmm.
538541
debug!("event_start: build microvm from snapshot");
539542

540543
let secret_free = vm_resources.machine_config.secret_free;
541544

542545
let mut kvm_capabilities = microvm_state.kvm_state.kvm_cap_modifiers.clone();
546+
543547
if secret_free {
544548
kvm_capabilities.push(KvmCapability::Add(KVM_CAP_USERFAULT));
545549
}
@@ -556,7 +560,10 @@ pub fn build_microvm_from_snapshot(
556560
let guest_memfd = match secret_free {
557561
true => Some(
558562
vmm.vm
559-
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
563+
.create_guest_memfd(
564+
vm_resources.memory_size(),
565+
GUEST_MEMFD_FLAG_SUPPORT_SHARED | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
566+
)
560567
.map_err(VmmError::Vm)?,
561568
),
562569
false => None,
@@ -622,9 +629,6 @@ pub fn build_microvm_from_snapshot(
622629
vmm.uffd = uffd;
623630
vmm.uffd_socket = socket;
624631

625-
#[cfg(target_arch = "x86_64")]
626-
vmm.vm.set_memory_private().map_err(VmmError::Vm)?;
627-
628632
#[cfg(target_arch = "x86_64")]
629633
{
630634
// Scale TSC to match, extract the TSC freq from the state if specified

src/vmm/src/vstate/vcpu.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,10 @@ impl Vcpu {
282282
// does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html .
283283
// We do not want to fail if the call is not successful, because depending
284284
// that may be acceptable depending on the workload.
285-
// TODO: once kvmclock is supported with Secret Fredom, remove this condition.
286285
#[cfg(target_arch = "x86_64")]
287-
if self.userfault_resolved.is_none() {
288-
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
289-
METRICS.vcpu.kvmclock_ctrl_fails.inc();
290-
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
291-
}
286+
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
287+
METRICS.vcpu.kvmclock_ctrl_fails.inc();
288+
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
292289
}
293290

294291
return StateMachine::next(Self::paused);
@@ -314,13 +311,10 @@ impl Vcpu {
314311
// does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html .
315312
// We do not want to fail if the call is not successful, because depending
316313
// that may be acceptable depending on the workload.
317-
// TODO: once kvmclock is supported with Secret Fredom, remove this condition.
318314
#[cfg(target_arch = "x86_64")]
319-
if self.userfault_resolved.is_none() {
320-
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
321-
METRICS.vcpu.kvmclock_ctrl_fails.inc();
322-
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
323-
}
315+
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
316+
METRICS.vcpu.kvmclock_ctrl_fails.inc();
317+
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
324318
}
325319

326320
// Move to 'paused' state.

src/vmm/src/vstate/vm.rs

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,16 @@ use std::path::Path;
1212
use std::sync::{Arc, Condvar, Mutex};
1313

1414
use kvm_bindings::{
15-
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
16-
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
15+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVMIO, kvm_create_guest_memfd,
16+
kvm_userspace_memory_region,
1717
};
1818
use kvm_ioctls::{Cap, VmFd};
1919
use vmm_sys_util::eventfd::EventFd;
2020
use vmm_sys_util::ioctl::ioctl_with_ref;
2121
use vmm_sys_util::ioctl_iow_nr;
2222

23+
use crate::arch::host_page_size;
2324
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
24-
use crate::arch::{VM_TYPE_FOR_SECRET_FREEDOM, host_page_size};
2525
use crate::logger::info;
2626
use crate::persist::CreateSnapshotError;
2727
use crate::utils::u64_to_usize;
@@ -33,7 +33,8 @@ use crate::vstate::memory::{
3333
use crate::vstate::vcpu::VcpuError;
3434
use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3535

36-
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
36+
pub(crate) const GUEST_MEMFD_FLAG_SUPPORT_SHARED: u64 = 1 << 0;
37+
pub(crate) const GUEST_MEMFD_FLAG_NO_DIRECT_MAP: u64 = 1 << 1;
3738

3839
/// KVM userfault information
3940
#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)]
@@ -140,14 +141,7 @@ impl Vm {
140141
const MAX_ATTEMPTS: u32 = 5;
141142
let mut attempt = 1;
142143
let fd = loop {
143-
let create_result = if secret_free && VM_TYPE_FOR_SECRET_FREEDOM.is_some() {
144-
kvm.fd
145-
.create_vm_with_type(VM_TYPE_FOR_SECRET_FREEDOM.unwrap())
146-
} else {
147-
kvm.fd.create_vm()
148-
};
149-
150-
match create_result {
144+
match kvm.fd.create_vm() {
151145
Ok(fd) => break fd,
152146
Err(e) if e.errno() == libc::EINTR && attempt < MAX_ATTEMPTS => {
153147
info!("Attempt #{attempt} of KVM_CREATE_VM returned EINTR");
@@ -374,28 +368,6 @@ impl Vm {
374368
&self.common.guest_memory
375369
}
376370

377-
/// Sets the memory attributes on all guest_memfd-backed regions to private
378-
pub fn set_memory_private(&self) -> Result<(), VmError> {
379-
if !self.secret_free() {
380-
return Ok(());
381-
}
382-
383-
for region in self.guest_memory().iter() {
384-
let attr = kvm_memory_attributes {
385-
address: region.start_addr().0,
386-
size: region.len(),
387-
attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64,
388-
..Default::default()
389-
};
390-
391-
self.fd()
392-
.set_memory_attributes(attr)
393-
.map_err(VmError::SetMemoryAttributes)?
394-
}
395-
396-
Ok(())
397-
}
398-
399371
/// Resets the KVM dirty bitmap for each of the guest's memory regions.
400372
pub fn reset_dirty_bitmap(&self) {
401373
self.guest_memory()

0 commit comments

Comments
 (0)