Skip to content

Commit 6d651fd

Browse files
olivereandersonphip1611
authored andcommitted
hypervisor: Fix live migration when AMX is configured
AMX requires dynamically enabling certain large state components which leads to an increase in the size of kvm_xsave. This was not taken into account by Cloud hypervisor until now. We solve this by refactoring `XSaveState` to directly wrap `kvm::Xsave` and always ensure (via a OnceLocked static variable) that all operations on the wrapped xsave state obtain an instance of the right size. Signed-Off-by: Oliver Anderson <[email protected]> On-behalf-of: SAP [email protected]
1 parent 3c16e1e commit 6d651fd

File tree

5 files changed

+67
-45
lines changed

5 files changed

+67
-45
lines changed

hypervisor/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ cfg-if = { workspace = true }
2121
concat-idents = "1.1.5"
2222
igvm = { workspace = true, optional = true }
2323
igvm_defs = { workspace = true, optional = true }
24-
kvm-bindings = { workspace = true, optional = true, features = ["serde"] }
24+
kvm-bindings = { workspace = true, optional = true, features = [
25+
"fam-wrappers",
26+
"serde",
27+
] }
2528
kvm-ioctls = { workspace = true, optional = true }
2629
libc = { workspace = true }
2730
log = { workspace = true }

hypervisor/src/arch/x86/mod.rs

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
//
1313

1414
use core::fmt;
15-
16-
use crate::{CpuVendor, Hypervisor};
15+
#[cfg(feature = "kvm")]
16+
use std::sync::OnceLock;
1717

1818
use thiserror::Error;
1919

20+
use crate::{CpuVendor, Hypervisor};
21+
2022
#[cfg(all(feature = "mshv_emulator", target_arch = "x86_64"))]
2123
pub mod emulator;
2224
pub mod gdt;
@@ -330,19 +332,43 @@ pub enum AmxGuestSupportError {
330332
AmxGuestTileRequest { errno: i64 },
331333
}
332334

333-
#[serde_with::serde_as]
335+
/// The length of the XSAVE flexible array member (FAM).
336+
/// This length increases when arch_prctl is utilized to dynamically add state components.
337+
///
338+
/// IMPORTANT: This static should only be updated via methods on [`XsaveState`].
339+
#[cfg(feature = "kvm")]
340+
static XSAVE_FAM_LENGTH: OnceLock<usize> = OnceLock::new();
341+
334342
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
335-
pub struct XsaveState {
336-
#[serde_as(as = "[_; 1024usize]")]
337-
pub region: [u32; 1024usize],
338-
}
343+
pub struct XsaveState(#[cfg(feature = "kvm")] pub(crate) kvm_bindings::Xsave);
339344

340345
impl XsaveState {
341346
const ARCH_GET_XCOMP_SUPP: usize = 0x1021;
342347
const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025;
343348
const ARCH_XCOMP_TILECFG: usize = 17;
344349
const ARCH_XCOMP_TILEDATA: usize = 18;
345350

351+
/// Construct an instance via the given initializer.
352+
///
353+
/// As long as dynamically enabled state components have only been enabled
354+
/// through static methods on this struct it is guaranteed that the
355+
/// initialization routine is given an Xsave struct of the expected size.
356+
#[cfg(feature = "kvm")]
357+
pub(crate) fn with_initializer<F, E>(
358+
mut init: F,
359+
) -> Result<Self, Box<dyn std::error::Error + Send + Sync + 'static>>
360+
where
361+
F: FnMut(&mut kvm_bindings::Xsave) -> Result<(), E>,
362+
E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
363+
{
364+
let fam_length = XSAVE_FAM_LENGTH.get().unwrap_or(&0);
365+
366+
let mut xsave = kvm_bindings::Xsave::new(*fam_length)?;
367+
368+
init(&mut xsave).map_err(Into::into)?;
369+
Ok(Self(xsave))
370+
}
371+
346372
/// This function enables the AMX related TILECFG and TILEDATA state components for guests.
347373
///
348374
/// # Background
@@ -353,9 +379,28 @@ impl XsaveState {
353379
hypervisor: &dyn Hypervisor,
354380
) -> Result<(), AmxGuestSupportError> {
355381
Self::amx_supported(hypervisor)?;
356-
357382
Self::request_guest_amx_support()?;
358383

384+
// If we are using the KVM hypervisor we meed to query for the new xsave2 size and update
385+
// `XSAVE_FAM_LENGTH` accordingly.
386+
#[cfg(feature = "kvm")]
387+
{
388+
// Obtain the number of bytes the kvm_xsave struct requires.
389+
// This number is documented to always be at least 4096 bytes, but
390+
let size = hypervisor.check_extension_int(kvm_ioctls::Cap::Xsave2);
391+
// Reality check: We should at least have this number of bytes and probably more as we have enabled
392+
// AMX tiles. If this is not the case, it is probably best to panic.
393+
assert!(size >= 4096);
394+
let fam_length = {
395+
// Computation is documented in `[kvm_bindings::kvm_xsave2::len]`
396+
((size as usize) - size_of::<kvm_bindings::kvm_xsave>())
397+
.div_ceil(size_of::<kvm_bindings::__u32>())
398+
};
399+
XSAVE_FAM_LENGTH
400+
.set(fam_length)
401+
.expect("This should only be set once");
402+
}
403+
359404
Ok(())
360405
}
361406

@@ -420,10 +465,3 @@ impl XsaveState {
420465
}
421466
}
422467
}
423-
424-
impl Default for XsaveState {
425-
fn default() -> Self {
426-
// SAFETY: this is plain old data structure
427-
unsafe { ::std::mem::zeroed() }
428-
}
429-
}

hypervisor/src/kvm/mod.rs

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,26 +2856,23 @@ impl KvmVcpu {
28562856
/// X86 specific call that returns the vcpu's current "xsave struct".
28572857
///
28582858
fn get_xsave(&self) -> cpu::Result<XsaveState> {
2859-
Ok(self
2860-
.fd
2861-
.get_xsave()
2862-
.map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2863-
.into())
2859+
XsaveState::with_initializer(|state|
2860+
// SAFETY: Any configured dynamically enabled state components are always enabled via
2861+
// static methods on `XsaveState` hence we know that `state` has the expected size.
2862+
unsafe { self.fd.get_xsave2(state) })
2863+
.map_err(|e| cpu::HypervisorCpuError::GetXsaveState(anyhow::Error::from_boxed(e)))
28642864
}
28652865

28662866
#[cfg(target_arch = "x86_64")]
28672867
///
28682868
/// X86 specific call that sets the vcpu's current "xsave struct".
28692869
///
28702870
fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2871-
let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2872-
// SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct
2873-
// when calling the kvm-ioctl library function.
2874-
unsafe {
2875-
self.fd
2876-
.set_xsave(&xsave)
2877-
.map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2878-
}
2871+
// SAFETY: Any configured dynamically enabled state components are always enabled via
2872+
// static methods on `XsaveState` hence we know that the wrapped instance has the
2873+
// expected size.
2874+
unsafe { self.fd.set_xsave2(&xsave.0) }
2875+
.map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
28792876
}
28802877

28812878
#[cfg(target_arch = "x86_64")]

hypervisor/src/kvm/x86_64/mod.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,18 +291,3 @@ impl From<MsrEntry> for kvm_msr_entry {
291291
}
292292
}
293293
}
294-
295-
impl From<kvm_xsave> for XsaveState {
296-
fn from(s: kvm_xsave) -> Self {
297-
Self { region: s.region }
298-
}
299-
}
300-
301-
impl From<XsaveState> for kvm_xsave {
302-
fn from(s: XsaveState) -> Self {
303-
Self {
304-
region: s.region,
305-
extra: Default::default(),
306-
}
307-
}
308-
}

hypervisor/src/mshv/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,6 @@ impl hypervisor::Hypervisor for MshvHypervisor {
398398
}
399399
}
400400

401-
402401
#[cfg(feature = "kvm")]
403402
fn check_extension_int(&self, _capability: kvm_ioctls::Cap) -> i32 {
404403
unimplemented!()

0 commit comments

Comments
 (0)