Skip to content

Commit baa2ecd

Browse files
committed
feat(vmm): Change XSAVE type from kvm_xsave to Xsave in snapshot
Intel AMX is an XSTATE feature and TILEDATA is disabled by default because it requires a larger area to save its state than the traditional 4096 bytes. Instead, Linux kernel allows VMMs to request the guest permission via `arch_prctl()`. As such, the size of the XSTATE buffer required to save XSTASTE is dynamic. To support dynamically-sized buffer, `KVM_CAP_XSAVE2` was introduced with `KVM_GET_XSAVE2`. Accordingly, kvm-bindings added `Xsave` that is an alias of `FamStructWrapper` for the `kvm_xsave` struct with FAM in the end, and kvm-ioctls added `get_xsave2()` for `KVM_GET_XSAVE2` and `set_xsave2()` to take `Xsave` to call `KVM_SET_XSAVE`. Change the type of `xsave` in `VcpuState` from `kvm_xsave` to `Xsave`. Use `get_xsave2()` and `set_xsave2()`. Signed-off-by: Takahiro Itazuri <[email protected]>
1 parent 1261452 commit baa2ecd

File tree

4 files changed

+84
-17
lines changed

4 files changed

+84
-17
lines changed

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/vmm/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ displaydoc = "0.2.5"
2222
event-manager = "0.4.0"
2323
gdbstub = { version = "0.7.3", optional = true }
2424
gdbstub_arch = { version = "0.3.1", optional = true }
25-
kvm-bindings = { version = "0.11.0", features = ["fam-wrappers", "serde"] }
26-
kvm-ioctls = "0.20.0"
25+
kvm-bindings = { version = "0.11.1", features = ["fam-wrappers", "serde"] }
26+
kvm-ioctls = "0.21.0"
2727
libc = "0.2.170"
2828
linux-loader = "0.13.0"
2929
log = { version = "0.4.26", features = ["std", "serde"] }

src/vmm/src/vstate/vcpu/x86_64.rs

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ use std::fmt::Debug;
1010

1111
use kvm_bindings::{
1212
kvm_debugregs, kvm_lapic_state, kvm_mp_state, kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs,
13-
kvm_xsave, CpuId, Msrs, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES,
13+
kvm_xsave, kvm_xsave2, CpuId, Msrs, Xsave, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES,
1414
};
1515
use kvm_ioctls::{VcpuExit, VcpuFd};
1616
use log::{error, warn};
1717
use serde::{Deserialize, Serialize};
18-
use vmm_sys_util::fam;
18+
use vmm_sys_util::fam::{self, FamStruct};
1919

2020
use crate::arch::x86_64::gen::msr_index::{MSR_IA32_TSC, MSR_IA32_TSC_DEADLINE};
2121
use crate::arch::x86_64::interrupts;
@@ -74,8 +74,10 @@ pub enum KvmVcpuError {
7474
VcpuGetVcpuEvents(kvm_ioctls::Error),
7575
/// Failed to get KVM vcpu xcrs: {0}
7676
VcpuGetXcrs(kvm_ioctls::Error),
77-
/// Failed to get KVM vcpu xsave: {0}
77+
/// Failed to get KVM vcpu xsave via KVM_GET_XSAVE: {0}
7878
VcpuGetXsave(kvm_ioctls::Error),
79+
/// Failed to get KVM vcpu xsave via KVM_GET_XSAVE2: {0}
80+
VcpuGetXsave2(kvm_ioctls::Error),
7981
/// Failed to get KVM vcpu cpuid: {0}
8082
VcpuGetCpuid(kvm_ioctls::Error),
8183
/// Failed to get KVM TSC frequency: {0}
@@ -147,6 +149,10 @@ pub struct KvmVcpu {
147149
/// The list of MSRs to include in a VM snapshot, in the same order as KVM returned them
148150
/// from KVM_GET_MSR_INDEX_LIST
149151
msrs_to_save: Vec<u32>,
152+
/// Byte size requiring to hold the dynamically-sized `kvm_xsave` struct.
153+
///
154+
/// `None` if `KVM_CAP_XSAVE2` not supported.
155+
xsave2_size: Option<usize>,
150156
}
151157

152158
/// Vcpu peripherals
@@ -176,6 +182,7 @@ impl KvmVcpu {
176182
fd: kvm_vcpu,
177183
peripherals: Default::default(),
178184
msrs_to_save: vm.msrs_to_save().to_vec(),
185+
xsave2_size: vm.xsave2_size(),
179186
})
180187
}
181188

@@ -496,7 +503,27 @@ impl KvmVcpu {
496503
.map_err(KvmVcpuError::VcpuGetMpState)?;
497504
let regs = self.fd.get_regs().map_err(KvmVcpuError::VcpuGetRegs)?;
498505
let sregs = self.fd.get_sregs().map_err(KvmVcpuError::VcpuGetSregs)?;
499-
let xsave = self.fd.get_xsave().map_err(KvmVcpuError::VcpuGetXsave)?;
506+
let xsave = match self.xsave2_size {
507+
Some(size) => {
508+
// SAFETY: The subtraction never underflows because
509+
// `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` returns at least 4096 bytes which equals
510+
// the size of `kvm_xsave`.
511+
// https://docs.kernel.org/virt/kvm/api.html#kvm-get-xsave2
512+
//
513+
// Note that the FAM entry type is `__u32` although `xsave2_xize` is in bytes.
514+
let fam_len = (size - std::mem::size_of::<kvm_xsave>())
515+
.div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
516+
let mut xsave = Xsave::new(fam_len).map_err(KvmVcpuError::Fam)?;
517+
// SAFETY: Safe because `xsave` is allocated above with enough size to save XSTATE.
518+
unsafe { self.fd.get_xsave2(&mut xsave) }.map_err(KvmVcpuError::VcpuGetXsave2)?;
519+
xsave
520+
}
521+
// `KVM_CAP_XSAVE2` not supported
522+
None => Xsave::from(vec![kvm_xsave2 {
523+
len: 0,
524+
xsave: self.fd.get_xsave().map_err(KvmVcpuError::VcpuGetXsave)?,
525+
}]),
526+
};
500527
let xcrs = self.fd.get_xcrs().map_err(KvmVcpuError::VcpuGetXcrs)?;
501528
let debug_regs = self
502529
.fd
@@ -601,9 +628,17 @@ impl KvmVcpu {
601628
self.fd
602629
.set_sregs(&state.sregs)
603630
.map_err(KvmVcpuError::VcpuSetSregs)?;
604-
self.fd
605-
.set_xsave(&state.xsave)
606-
.map_err(KvmVcpuError::VcpuSetXsave)?;
631+
// SAFETY: Safe unless the snapshot is corrupted.
632+
unsafe {
633+
// kvm-ioctl's `set_xsave2()` can be called even on kernel versions not supporting
634+
// `KVM_CAP_XSAVE2`, because it internally calls `KVM_SET_XSAVE` API that was extended
635+
// by Linux kernel. Thus, `KVM_SET_XSAVE2` API does not exist as a KVM interface.
636+
// However, kvm-ioctl added `set_xsave2()` to allow users to pass `Xsave` instead of the
637+
// older `kvm_xsave`.
638+
self.fd
639+
.set_xsave2(&state.xsave)
640+
.map_err(KvmVcpuError::VcpuSetXsave)?;
641+
}
607642
self.fd
608643
.set_xcrs(&state.xcrs)
609644
.map_err(KvmVcpuError::VcpuSetXcrs)?;
@@ -684,7 +719,7 @@ pub struct VcpuState {
684719
/// Xcrs.
685720
pub xcrs: kvm_xcrs,
686721
/// Xsave.
687-
pub xsave: kvm_xsave,
722+
pub xsave: Xsave,
688723
/// Tsc khz.
689724
pub tsc_khz: Option<u32>,
690725
}
@@ -744,7 +779,7 @@ mod tests {
744779
sregs: Default::default(),
745780
vcpu_events: Default::default(),
746781
xcrs: Default::default(),
747-
xsave: Default::default(),
782+
xsave: Xsave::new(0).unwrap(),
748783
tsc_khz: Some(0),
749784
}
750785
}

src/vmm/src/vstate/vm/x86_64.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use kvm_bindings::{
77
kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, MsrList, KVM_CLOCK_TSC_STABLE,
88
KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY,
99
};
10-
use kvm_ioctls::VmFd;
10+
use kvm_ioctls::{Cap, VmFd};
1111
use serde::{Deserialize, Serialize};
1212

1313
use crate::arch::x86_64::msr::MsrError;
@@ -19,6 +19,8 @@ use crate::vstate::vm::VmError;
1919
#[cfg(target_arch = "x86_64")]
2020
#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)]
2121
pub enum ArchVmError {
22+
/// Failed to check KVM capability (0): {1}
23+
CheckCapability(Cap, kvm_ioctls::Error),
2224
/// Set PIT2 error: {0}
2325
SetPit2(kvm_ioctls::Error),
2426
/// Set clock error: {0}
@@ -48,6 +50,10 @@ pub enum ArchVmError {
4850
pub struct ArchVm {
4951
pub(super) fd: VmFd,
5052
msrs_to_save: MsrList,
53+
/// Cache of byte size requiring to hold the dynamically-sized `kvm_xsave` struct.
54+
///
55+
/// `None` if `KVM_CAP_XSAVE2` not supported.
56+
xsave2_size: Option<usize>,
5157
}
5258

5359
impl ArchVm {
@@ -57,10 +63,31 @@ impl ArchVm {
5763

5864
let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?;
5965

66+
// `KVM_CAP_XSAVE2` was introduced to support dynamically-sized XSTATE buffer in kernel
67+
// v5.17. `KVM_GET_EXTENSION(KVM_CAP_XSAVE2)` returns the required size in byte if
68+
// supported; otherwise returns 0.
69+
// https://github.com/torvalds/linux/commit/be50b2065dfa3d88428fdfdc340d154d96bf6848
70+
// Cache the value in order not to call it at each vCPU creation.
71+
let xsave2_size = match fd.check_extension_int(Cap::Xsave2) {
72+
-1 => {
73+
return Err(VmError::Arch(ArchVmError::CheckCapability(
74+
Cap::Xsave2,
75+
vmm_sys_util::errno::Error::last(),
76+
)));
77+
}
78+
0 => None,
79+
// SAFETY: Safe because the possible negative value is only -1 and is handled above.
80+
ret => Some(ret.try_into().unwrap()),
81+
};
82+
6083
fd.set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS))
6184
.map_err(ArchVmError::SetTssAddress)?;
6285

63-
Ok(ArchVm { fd, msrs_to_save })
86+
Ok(ArchVm {
87+
fd,
88+
msrs_to_save,
89+
xsave2_size,
90+
})
6491
}
6592

6693
pub(super) fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> {
@@ -162,6 +189,11 @@ impl ArchVm {
162189
pub fn msrs_to_save(&self) -> &[u32] {
163190
self.msrs_to_save.as_slice()
164191
}
192+
193+
/// Gets the byte size of the `kvm_xsave` struct.
194+
pub fn xsave2_size(&self) -> Option<usize> {
195+
self.xsave2_size
196+
}
165197
}
166198

167199
#[derive(Default, Deserialize, Serialize)]

0 commit comments

Comments
 (0)