Skip to content

Commit 4f7295f

Browse files
authored
Merge branch 'main' into pull_latest_s3_artifacts
2 parents e4c83a8 + 89d3ad8 commit 4f7295f

File tree

19 files changed

+454
-175
lines changed

19 files changed

+454
-175
lines changed

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,24 @@ and this project adheres to
1010

1111
### Added
1212

13+
- [#5510](https://github.com/firecracker-microvm/firecracker/pull/5510),
14+
[#5593](https://github.com/firecracker-microvm/firecracker/pull/5593),
15+
[#5564](https://github.com/firecracker-microvm/firecracker/pull/5564): Add
16+
support for the
17+
[VMClock device](https://uapi-group.org/specifications/specs/vmclock). The
18+
implementation supports the snapshot safety features proposed
19+
[here](https://lore.kernel.org/lkml/20260107132514.437-1-bchalios@amazon.es/),
20+
but doesn't provide currently any clock-specific information for helping the
21+
guest synchronize its clocks. More information can be found in
22+
[docs](docs/snapshotting/snapshot-support.md#userspace-notifications-of-loading-virtual-machine-snapshots).
23+
1324
### Changed
1425

26+
- [#5564](https://github.com/firecracker-microvm/firecracker/pull/5564): which
27+
added support for VMClock, uses one extra GSI for the VMClock device itself
28+
which reduces the available GSIs for VirtIO devices. New maximum values is 92
29+
devices on Aarch64 and 17 devices on x86.
30+
1531
### Deprecated
1632

1733
### Removed

docs/snapshotting/snapshot-support.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
- [Snapshot security and uniqueness](#snapshot-security-and-uniqueness)
2525
- [Secure and insecure usage examples](#usage-examples)
2626
- [Reusing snapshotted states securely](#reusing-snapshotted-states-securely)
27+
- [Userspace notifications of loading Virtual Machine snapshots](#userspace-notifications-of-loading-virtual-machine-snapshots)
2728
- [Vsock device limitation](#vsock-device-limitation)
2829
- [VMGenID device limitation](#vmgenid-device-limitation)
2930
- [Where can I resume my snapshots?](#where-can-i-resume-my-snapshots)
@@ -590,6 +591,41 @@ identifiers, cached random numbers, cryptographic tokens, etc **will** still be
590591
replicated across multiple microVMs resumed from the same snapshot. Users need
591592
to implement mechanisms for ensuring de-duplication of such state, where needed.
592593
594+
## Userspace notifications of loading Virtual Machine snapshots
595+
596+
VMClock device
597+
([specification](https://uapi-group.org/specifications/specs/vmclock/)) is a
598+
device that enables efficient application clock synchronization against real
599+
wallclock time, for applications running inside Virtual Machines. VMCLock also
600+
takes care situations where there is some sort disruption happens to the clock.
601+
It handles these through fields in the
602+
[`vmlcock_abi`](https://uapi-group.org/specifications/specs/vmclock/#the-vmclock_abi-structure).
603+
Currently, it handles two cases:
604+
605+
1. Live migration through the `disruption_marker` field.
606+
1. Virtual machine snapshots through the `vm_generation_counter`.
607+
608+
Whenever a VM starts from a snapshot VMClock will present a new (different that
609+
what was previously stored) value in the `vm_generation_counter`. This happens
610+
in an atomic way, i.e. `vm_generation_counter` will include the new value as
611+
soon as vCPUs are resumed post snapshot loading.
612+
613+
User space libraries, e.g. userspace PRNGs can mmap() `vmclock_abi` and monitor
614+
changes in `vm_generation_counter` to observe when they need to adapt and/or
615+
recreate state.
616+
617+
Moreover, VMClock allows processes to call poll() on the VMClock device and get
618+
notified about changes through an event loop.
619+
620+
> [!IMPORTANT] Support for `vm_generation_counter` and `poll()` is implemented
621+
> in Linux through the patches
622+
> [here](https://lore.kernel.org/lkml/20260107132514.437-1-bchalios@amazon.es/).
623+
> We have backported these patches for AL kernels
624+
> [here](../../resources/patches/vmclock) 5.10 and 6.1 kernels. Using the
625+
> kernels suggested from the [Getting Started Guide](../getting-started.md)
626+
> includes these patches. When using mainline kernels users need to make sure
627+
> that they apply the linked patches, until these get merged upstream.
628+
593629
## Vsock device reset
594630
595631
The vsock device is reset across snapshot/restore to avoid inconsistent state

src/vmm/src/arch/aarch64/fdt.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::arch::{
2020
use crate::device_manager::DeviceManager;
2121
use crate::device_manager::mmio::MMIODeviceInfo;
2222
use crate::device_manager::pci_mngr::PciDevices;
23+
use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock};
2324
use crate::devices::acpi::vmgenid::{VMGENID_MEM_SIZE, VmGenId};
2425
use crate::initrd::InitrdConfig;
2526
use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestRegionType};
@@ -97,6 +98,7 @@ pub fn create_fdt(
9798
create_psci_node(&mut fdt_writer)?;
9899
create_devices_node(&mut fdt_writer, device_manager)?;
99100
create_vmgenid_node(&mut fdt_writer, &device_manager.acpi_devices.vmgenid)?;
101+
create_vmclock_node(&mut fdt_writer, &device_manager.acpi_devices.vmclock)?;
100102
create_pci_nodes(&mut fdt_writer, &device_manager.pci_devices)?;
101103

102104
// End Header node.
@@ -287,6 +289,18 @@ fn create_vmgenid_node(fdt: &mut FdtWriter, vmgenid: &VmGenId) -> Result<(), Fdt
287289
Ok(())
288290
}
289291

292+
fn create_vmclock_node(fdt: &mut FdtWriter, vmclock: &VmClock) -> Result<(), FdtError> {
293+
let vmclock_node = fdt.begin_node(&format!("ptp@{}", vmclock.guest_address.0))?;
294+
fdt.property_string("compatible", "amazon,vmclock")?;
295+
fdt.property_array_u64("reg", &[vmclock.guest_address.0, VMCLOCK_SIZE as u64])?;
296+
fdt.property_array_u32(
297+
"interrupts",
298+
&[GIC_FDT_IRQ_TYPE_SPI, vmclock.gsi, IRQ_TYPE_EDGE_RISING],
299+
)?;
300+
fdt.end_node(vmclock_node)?;
301+
Ok(())
302+
}
303+
290304
fn create_gic_node(fdt: &mut FdtWriter, gic_device: &GICDevice) -> Result<(), FdtError> {
291305
let interrupt = fdt.begin_node("intc")?;
292306
fdt.property_string("compatible", gic_device.fdt_compatibility())?;
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

src/vmm/src/arch/x86_64/vcpu.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -746,8 +746,6 @@ impl Peripherals {
746746
}
747747
unexpected_exit => {
748748
METRICS.vcpu.failures.inc();
749-
// TODO: Are we sure we want to finish running a vcpu upon
750-
// receiving a vm exit that is not necessarily an error?
751749
error!("Unexpected exit reason on vcpu run: {:?}", unexpected_exit);
752750
Err(VcpuError::UnhandledKvmExit(format!(
753751
"{:?}",

src/vmm/src/builder.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ pub fn build_microvm_for_boot(
288288
)?;
289289

290290
device_manager.attach_vmgenid_device(&vm)?;
291-
#[cfg(target_arch = "x86_64")]
292291
device_manager.attach_vmclock_device(&vm)?;
293292

294293
#[cfg(target_arch = "aarch64")]

src/vmm/src/device_manager/acpi.rs

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4+
#[cfg(target_arch = "x86_64")]
45
use acpi_tables::{Aml, aml};
56
use vm_memory::GuestMemoryError;
67

78
use crate::Vm;
8-
#[cfg(target_arch = "x86_64")]
99
use crate::devices::acpi::vmclock::VmClock;
1010
use crate::devices::acpi::vmgenid::VmGenId;
1111
use crate::vstate::resources::ResourceAllocator;
@@ -23,7 +23,6 @@ pub struct ACPIDeviceManager {
2323
/// VMGenID device
2424
pub vmgenid: VmGenId,
2525
/// VMclock device
26-
#[cfg(target_arch = "x86_64")]
2726
pub vmclock: VmClock,
2827
}
2928

@@ -32,7 +31,6 @@ impl ACPIDeviceManager {
3231
pub fn new(resource_allocator: &mut ResourceAllocator) -> Self {
3332
ACPIDeviceManager {
3433
vmgenid: VmGenId::new(resource_allocator),
35-
#[cfg(target_arch = "x86_64")]
3634
vmclock: VmClock::new(resource_allocator),
3735
}
3836
}
@@ -43,19 +41,19 @@ impl ACPIDeviceManager {
4341
Ok(())
4442
}
4543

46-
#[cfg(target_arch = "x86_64")]
4744
pub fn attach_vmclock(&self, vm: &Vm) -> Result<(), ACPIDeviceError> {
45+
vm.register_irq(&self.vmclock.interrupt_evt, self.vmclock.gsi)?;
4846
self.vmclock.activate(vm.guest_memory())?;
4947
Ok(())
5048
}
5149
}
5250

51+
#[cfg(target_arch = "x86_64")]
5352
impl Aml for ACPIDeviceManager {
5453
fn append_aml_bytes(&self, v: &mut Vec<u8>) -> Result<(), aml::AmlError> {
5554
// AML for [`VmGenId`] device.
5655
self.vmgenid.append_aml_bytes(v)?;
5756
// AML for [`VmClock`] device.
58-
#[cfg(target_arch = "x86_64")]
5957
self.vmclock.append_aml_bytes(v)?;
6058

6159
// Create the AML for the GED interrupt handler
@@ -65,30 +63,37 @@ impl Aml for ACPIDeviceManager {
6563
&aml::Name::new("_HID".try_into()?, &"ACPI0013")?,
6664
&aml::Name::new(
6765
"_CRS".try_into()?,
68-
&aml::ResourceTemplate::new(vec![&aml::Interrupt::new(
69-
true,
70-
true,
71-
false,
72-
false,
73-
self.vmgenid.gsi,
74-
)]),
66+
&aml::ResourceTemplate::new(vec![
67+
&aml::Interrupt::new(true, true, false, false, self.vmgenid.gsi),
68+
&aml::Interrupt::new(true, true, false, false, self.vmclock.gsi),
69+
]),
7570
)?,
71+
// We know that the maximum IRQ number fits in a u8. We have up to
72+
// 32 IRQs in x86 and up to 128 in ARM (look into `vmm::crate::arch::layout::GSI_LEGACY_END`).
73+
// Both `vmgenid.gsi` and `vmclock.gsi` can safely be cast to `u8`
74+
// without truncation, so we let clippy know.
7675
&aml::Method::new(
7776
"_EVT".try_into()?,
7877
1,
7978
true,
80-
vec![&aml::If::new(
81-
// We know that the maximum IRQ number fits in a u8. We have up to
82-
// 32 IRQs in x86 and up to 128 in
83-
// ARM (look into
84-
// `vmm::crate::arch::layout::GSI_LEGACY_END`)
85-
#[allow(clippy::cast_possible_truncation)]
86-
&aml::Equal::new(&aml::Arg(0), &(self.vmgenid.gsi as u8)),
87-
vec![&aml::Notify::new(
88-
&aml::Path::new("\\_SB_.VGEN")?,
89-
&0x80usize,
90-
)],
91-
)],
79+
vec![
80+
&aml::If::new(
81+
#[allow(clippy::cast_possible_truncation)]
82+
&aml::Equal::new(&aml::Arg(0), &(self.vmgenid.gsi as u8)),
83+
vec![&aml::Notify::new(
84+
&aml::Path::new("\\_SB_.VGEN")?,
85+
&0x80usize,
86+
)],
87+
),
88+
&aml::If::new(
89+
#[allow(clippy::cast_possible_truncation)]
90+
&aml::Equal::new(&aml::Arg(0), &(self.vmclock.gsi as u8)),
91+
vec![&aml::Notify::new(
92+
&aml::Path::new("\\_SB_.VCLK")?,
93+
&0x80usize,
94+
)],
95+
),
96+
],
9297
),
9398
],
9499
)

src/vmm/src/device_manager/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,6 @@ impl DeviceManager {
237237
Ok(())
238238
}
239239

240-
#[cfg(target_arch = "x86_64")]
241240
pub(crate) fn attach_vmclock_device(&mut self, vm: &Vm) -> Result<(), AttachDeviceError> {
242241
self.acpi_devices.attach_vmclock(vm)?;
243242
Ok(())
@@ -465,6 +464,9 @@ impl<'a> Persist<'a> for DeviceManager {
465464
// Restore ACPI devices
466465
let mut acpi_devices = ACPIDeviceManager::restore(constructor_args.vm, &state.acpi_state)?;
467466
acpi_devices.vmgenid.notify_guest()?;
467+
acpi_devices
468+
.vmclock
469+
.post_load_update(constructor_args.vm.guest_memory());
468470

469471
// Restore PCI devices
470472
let pci_ctor_args = PciDevicesConstructorArgs {

src/vmm/src/device_manager/persist.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use super::mmio::*;
1515
#[cfg(target_arch = "aarch64")]
1616
use crate::arch::DeviceType;
1717
use crate::device_manager::acpi::ACPIDeviceError;
18-
#[cfg(target_arch = "x86_64")]
1918
use crate::devices::acpi::vmclock::{VmClock, VmClockState};
2019
use crate::devices::acpi::vmgenid::{VMGenIDState, VmGenId};
2120
#[cfg(target_arch = "aarch64")]
@@ -168,7 +167,6 @@ impl fmt::Debug for MMIODevManagerConstructorArgs<'_> {
168167
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
169168
pub struct ACPIDeviceManagerState {
170169
vmgenid: VMGenIDState,
171-
#[cfg(target_arch = "x86_64")]
172170
vmclock: VmClockState,
173171
}
174172

@@ -180,7 +178,6 @@ impl<'a> Persist<'a> for ACPIDeviceManager {
180178
fn save(&self) -> Self::State {
181179
ACPIDeviceManagerState {
182180
vmgenid: self.vmgenid.save(),
183-
#[cfg(target_arch = "x86_64")]
184181
vmclock: self.vmclock.save(),
185182
}
186183
}
@@ -190,10 +187,14 @@ impl<'a> Persist<'a> for ACPIDeviceManager {
190187
// Safe to unwrap() here, this will never return an error.
191188
vmgenid: VmGenId::restore((), &state.vmgenid).unwrap(),
192189
// Safe to unwrap() here, this will never return an error.
193-
#[cfg(target_arch = "x86_64")]
194-
vmclock: VmClock::restore(vm.guest_memory(), &state.vmclock).unwrap(),
190+
vmclock: VmClock::restore((), &state.vmclock).unwrap(),
195191
};
196192

193+
vm.register_irq(
194+
&acpi_devices.vmclock.interrupt_evt,
195+
acpi_devices.vmclock.gsi,
196+
)?;
197+
197198
acpi_devices.attach_vmgenid(vm)?;
198199
Ok(acpi_devices)
199200
}

0 commit comments

Comments
 (0)