Skip to content

Commit 1261452

Browse files
committed
feat(vmm): Request permission for Intel AMX
Intel AMX (Advanced Matrix Extensions) was introduced in Intel Sapphire Rapids to accelerate deep learning and AI workloads. Since it requires a larger area to save its state, the TILEDATA feature is disabled by default. We request permission for it by default because it can be disabled via CPU template. Otherwise, kernels prior to v6.4 have a bug where KVM_GET_SUPPORTED_CPUID returns an inconsistent state of TILECFG enabled but TILEDATA disabled by default, causing guest's #GP fault on xsetbv instruction. Signed-off-by: Takahiro Itazuri <[email protected]>
1 parent f552d4c commit 1261452

File tree

4 files changed

+146
-1
lines changed

4 files changed

+146
-1
lines changed

src/vmm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ license = "Apache-2.0"
99
bench = false
1010

1111
[dependencies]
12-
acpi_tables = { path = "../acpi-tables" }
12+
acpi_tables = { path = "../acpi-tables" }
1313
aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] }
1414
arrayvec = { version = "0.7.6", optional = true }
1515
aws-lc-rs = { version = "1.12.4", features = ["bindgen"] }

src/vmm/src/arch/x86_64/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ mod mptable;
1919
pub mod msr;
2020
/// Logic for configuring x86_64 registers.
2121
pub mod regs;
22+
/// Logic for configuring XSTATE features.
23+
pub mod xstate;
2224

2325
#[allow(missing_docs)]
2426
pub mod gen;

src/vmm/src/arch/x86_64/xstate.rs

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use vmm_sys_util::syscall::SyscallReturnCode;
5+
6+
use crate::arch::x86_64::gen::arch_prctl;
7+
8+
const INTEL_AMX_MASK: u64 = 1u64 << arch_prctl::ARCH_XCOMP_TILEDATA;
9+
10+
/// Errors assocaited with x86_64's dynamic XSAVE state features.
11+
#[derive(Debug, thiserror::Error, displaydoc::Display)]
12+
pub enum XstateError {
13+
/// Failed to get supported XSTATE features: {0}
14+
GetSupportedXstateFeatures(std::io::Error),
15+
/// Failed to request permission for XSTATE feature ({0}): {1}
16+
RequestXstateFeaturePermission(u32, std::io::Error),
17+
}
18+
19+
/// Request permission for all dynamic XSTATE features.
20+
///
21+
/// Some XSTATE features are not permitted by default, because they may require a larger area to
22+
/// save their states than the tranditional 4096-byte area. Instead, the permission for them can be
23+
/// requested via arch_prctl().
24+
/// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
25+
///
26+
/// Firecracker requests permission for them by default if available in order to retrieve the
27+
/// full supported feature set via KVM_GET_SUPPORTED_CPUID.
28+
/// https://docs.kernel.org/virt/kvm/api.html#kvm-get-supported-cpuid
29+
///
30+
/// Note that requested features can be masked by a CPU template.
31+
pub fn request_dynamic_xstate_features() -> Result<(), XstateError> {
32+
let supported_xfeatures =
33+
match get_supported_xfeatures().map_err(XstateError::GetSupportedXstateFeatures)? {
34+
Some(supported_xfeatures) => supported_xfeatures,
35+
// Exit early if dynamic XSTATE feature enabling is not supported on the kernel.
36+
None => return Ok(()),
37+
};
38+
39+
// Intel AMX's TILEDATA
40+
//
41+
// Unless requested, on kernels prior to v6.4, KVM_GET_SUPPORTED_CPUID returns an
42+
// inconsistent state where TILECFG is set but TILEDATA isn't. Such a half-enabled state
43+
// causes guest crash during boot because a guest calls XSETBV instruction with all
44+
// XSAVE feature bits enumerated on CPUID and XSETBV only accepts either of both Intel
45+
// AMX bits enabled or disabled; otherwise resulting in general protection fault.
46+
if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
47+
request_xfeature_permission(arch_prctl::ARCH_XCOMP_TILEDATA).map_err(|err| {
48+
XstateError::RequestXstateFeaturePermission(arch_prctl::ARCH_XCOMP_TILEDATA, err)
49+
})?;
50+
}
51+
52+
Ok(())
53+
}
54+
55+
/// Get supported XSTATE features
56+
///
57+
/// Returns Ok(None) if dynamic XSTATE feature enabling is not supported.
58+
fn get_supported_xfeatures() -> Result<Option<u64>, std::io::Error> {
59+
let mut supported_xfeatures: u64 = 0;
60+
61+
// SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
62+
// https://man7.org/linux/man-pages/man2/arch_prctl.2.html
63+
match SyscallReturnCode(unsafe {
64+
libc::syscall(
65+
libc::SYS_arch_prctl,
66+
arch_prctl::ARCH_GET_XCOMP_SUPP,
67+
&mut supported_xfeatures as *mut libc::c_ulong,
68+
)
69+
})
70+
.into_empty_result()
71+
{
72+
Ok(()) => Ok(Some(supported_xfeatures)),
73+
// EINVAL is returned if the dynamic XSTATE feature enabling is not supported (e.g. kernel
74+
// version prior to v5.17).
75+
// https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
76+
Err(err) if err.raw_os_error() == Some(libc::EINVAL) => Ok(None),
77+
Err(err) => Err(err),
78+
}
79+
}
80+
81+
/// Request permission for a dynamic XSTATE feature.
82+
///
83+
/// This should be called after `get_supported_xfeatures()` that also checks that dynamic XSTATE
84+
/// feature enabling is supported.
85+
fn request_xfeature_permission(xfeature: u32) -> Result<(), std::io::Error> {
86+
// SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` value.
87+
// https://man7.org/linux/man-pages/man2/arch_prctl.2.html
88+
SyscallReturnCode(unsafe {
89+
libc::syscall(
90+
libc::SYS_arch_prctl,
91+
arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM as libc::c_ulong,
92+
xfeature as libc::c_ulong,
93+
)
94+
})
95+
.into_empty_result()
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
102+
// Get permitted XSTATE features.
103+
fn get_permitted_xstate_features() -> Result<u64, std::io::Error> {
104+
let mut permitted_xfeatures: u64 = 0;
105+
// SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer.
106+
match SyscallReturnCode(unsafe {
107+
libc::syscall(
108+
libc::SYS_arch_prctl,
109+
arch_prctl::ARCH_GET_XCOMP_GUEST_PERM,
110+
&mut permitted_xfeatures as *mut libc::c_ulong,
111+
)
112+
})
113+
.into_empty_result()
114+
{
115+
Ok(()) => Ok(permitted_xfeatures),
116+
Err(err) => Err(err),
117+
}
118+
}
119+
120+
#[test]
121+
fn test_request_xstate_feature_permission() {
122+
request_dynamic_xstate_features().unwrap();
123+
124+
let supported_xfeatures = match get_supported_xfeatures().unwrap() {
125+
Some(supported_xfeatures) => supported_xfeatures,
126+
// Nothing to test if dynamic XSTATE feature enabling is not supported on the kernel.
127+
None => return,
128+
};
129+
130+
// Check each dynamic feature is enabled. (currently only Intel AMX TILEDATA)
131+
if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK {
132+
let permitted_xfeatures = get_permitted_xstate_features().unwrap();
133+
assert_eq!(permitted_xfeatures & INTEL_AMX_MASK, INTEL_AMX_MASK);
134+
}
135+
}
136+
}

src/vmm/src/vstate/kvm.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES};
77
use kvm_ioctls::Kvm as KvmFd;
88
use serde::{Deserialize, Serialize};
99

10+
#[cfg(target_arch = "x86_64")]
11+
use crate::arch::x86_64::xstate::{request_dynamic_xstate_features, XstateError};
1012
use crate::cpu_config::templates::KvmCapability;
1113
use crate::vstate::memory::{GuestMemory, GuestMemoryMmap};
1214

@@ -27,6 +29,9 @@ pub enum KvmError {
2729
GetSupportedCpuId(kvm_ioctls::Error),
2830
/// The number of configured slots is bigger than the maximum reported by KVM
2931
NotEnoughMemorySlots,
32+
#[cfg(target_arch = "x86_64")]
33+
/// Failed to request permission for dynamic XSTATE features {0}
34+
XstateFeatures(XstateError),
3035
}
3136

3237
/// Struct with kvm fd and kvm associated paramenters.
@@ -73,6 +78,8 @@ impl Kvm {
7378

7479
#[cfg(target_arch = "x86_64")]
7580
{
81+
request_dynamic_xstate_features().map_err(KvmError::XstateFeatures)?;
82+
7683
let supported_cpuid = kvm_fd
7784
.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES)
7885
.map_err(KvmError::GetSupportedCpuId)?;

0 commit comments

Comments
 (0)