Skip to content

Commit d468920

Browse files
committed
feat: Support Intel AMX
Intel AMX was introduced in Intel Sapphire Rapids and stands for Advanced Matrix Extensions that accelerates deep learning and artificial intelligence workloads. Since it requires larger area to save its state, the TILEDATA feature is disabled by default. We enable it by default because it can be disabled via CPU template. Otherwise, kernels prior to v6.4 have a bug where KVM_GET_SUPPORTED_CPUID returns an inconsistent state of TILECFG enabled but TILEDATA disabled by default, causing guest's #GP fault on xsetbv instruction. Signed-off-by: Takahiro Itazuri <[email protected]>
1 parent d6a52b8 commit d468920

File tree

2 files changed

+176
-1
lines changed

2 files changed

+176
-1
lines changed

src/vmm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ license = "Apache-2.0"
99
bench = false
1010

1111
[dependencies]
12-
acpi_tables = { path = "../acpi-tables" }
12+
acpi_tables = { path = "../acpi-tables" }
1313
aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] }
1414
arrayvec = { version = "0.7.6", optional = true }
1515
aws-lc-rs = { version = "1.12.4", features = ["bindgen"] }

src/vmm/src/vstate/kvm.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@ use kvm_bindings::KVM_API_VERSION;
66
use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES};
77
use kvm_ioctls::Kvm as KvmFd;
88
use serde::{Deserialize, Serialize};
9+
#[cfg(target_arch = "x86_64")]
10+
use vmm_sys_util::syscall::SyscallReturnCode;
911

12+
#[cfg(target_arch = "x86_64")]
13+
use crate::arch::x86_64::gen::arch_prctl;
1014
use crate::cpu_config::templates::KvmCapability;
1115
use crate::vstate::memory::{GuestMemory, GuestMemoryMmap};
1216

@@ -25,8 +29,14 @@ pub enum KvmError {
2529
#[cfg(target_arch = "x86_64")]
2630
/// Failed to get supported cpuid: {0}
2731
GetSupportedCpuId(kvm_ioctls::Error),
32+
#[cfg(target_arch = "x86_64")]
33+
/// Failed to get supported XSTATE features: {0}
34+
GetSupportedXstateFeatures(std::io::Error),
2835
/// The number of configured slots is bigger than the maximum reported by KVM
2936
NotEnoughMemorySlots,
37+
#[cfg(target_arch = "x86_64")]
38+
/// Failed to enable XSTATE features ({0:#b}): {1}
39+
RequestXstateFeatures(u32, std::io::Error),
3040
}
3141

3242
/// Struct with kvm fd and kvm associated paramenters.
@@ -73,6 +83,8 @@ impl Kvm {
7383

7484
#[cfg(target_arch = "x86_64")]
7585
{
86+
Self::enable_intel_amx()?;
87+
7688
let supported_cpuid = kvm_fd
7789
.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES)
7890
.map_err(KvmError::GetSupportedCpuId)?;
@@ -86,6 +98,74 @@ impl Kvm {
8698
}
8799
}
88100

101+
#[cfg(target_arch = "x86_64")]
102+
// XSTATE feature mask for Intel AMX.
103+
const INTEL_AMX_XCOMP_MASK: libc::c_ulong =
104+
(1u64 << arch_prctl::ARCH_XCOMP_TILECFG) | (1u64 << arch_prctl::ARCH_XCOMP_TILEDATA);
105+
106+
/// Enable Intel AMX if available.
107+
///
108+
/// Intel AMX (Advanced Matrix Extensions) is an instruction set for AI workloads that was
109+
/// introduced in Intel Sapphire Rapids (*7i.metal). Since it requires larger area to save the
110+
/// state, it is disabled by default.
111+
/// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
112+
///
113+
/// We enable it by default but can be disabled by CPU template; otherwise,
114+
/// KVM_GET_SUPPORTED_CPUID returns a inconsistent state where TILECFG is enabled but TILEDATA
115+
/// is disabled, causing guest's #GP fault on xsetbv due to the lack of sanity check.
116+
/// https://lore.kernel.org/all/[email protected]/
117+
///
118+
/// Dynamically-enabled feature bits need to be requested with arch_prctl() before calling
119+
/// KVM_GET_SUPPORTED_CPUID. Feature bits that have not been requested are excluded from the
120+
/// result of KVM_GET_SUPPORTED_CPUID.
121+
/// https://docs.kernel.org/virt/kvm/api.html
122+
///
123+
/// Note that no memory allocation to save Intel AMX state happens here immediately.
124+
#[cfg(target_arch = "x86_64")]
125+
fn enable_intel_amx() -> Result<(), KvmError> {
126+
// Get the supported xstate features.
127+
let mut supported_xfeatures: libc::c_ulong = 0;
128+
// SAFETY: Safe because the second input (`op`) might not be valid for unsupported kernels
129+
// but EINVAL is handled later, and the third input (`addr`) is a valid c_ulong pointer.
130+
// https://man7.org/linux/man-pages/man2/arch_prctl.2.html
131+
SyscallReturnCode(unsafe {
132+
libc::syscall(
133+
libc::SYS_arch_prctl,
134+
arch_prctl::ARCH_GET_XCOMP_SUPP,
135+
&mut supported_xfeatures as *mut libc::c_ulong,
136+
)
137+
})
138+
.into_empty_result()
139+
.or_else(|err| {
140+
// EINVAL is returned if ARCH_GET_XCOMP_SUPP is not supported (e.g. kernel versions
141+
// prior to v5.17).
142+
// https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
143+
if err.raw_os_error() == Some(libc::EINVAL) {
144+
Ok(())
145+
} else {
146+
Err(err)
147+
}
148+
})
149+
.map_err(KvmError::GetSupportedXstateFeatures)?;
150+
151+
// Enable Intel AMX if supported.
152+
if (supported_xfeatures & Self::INTEL_AMX_XCOMP_MASK) == Self::INTEL_AMX_XCOMP_MASK {
153+
// SAFETY: Safe because ARCH_REQ_XCOMP_GUEST_PERM is supported if ARCH_GET_XCOMP_SUPP is
154+
// supported and it has been confirmed that ARCH_XCOMP_TILEDATA is supported.
155+
SyscallReturnCode(unsafe {
156+
libc::syscall(
157+
libc::SYS_arch_prctl,
158+
arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM,
159+
arch_prctl::ARCH_XCOMP_TILEDATA,
160+
)
161+
})
162+
.into_empty_result()
163+
.map_err(|err| KvmError::RequestXstateFeatures(arch_prctl::ARCH_XCOMP_TILEDATA, err))?;
164+
}
165+
166+
Ok(())
167+
}
168+
89169
/// Msrs needed to be saved on snapshot creation.
90170
#[cfg(target_arch = "x86_64")]
91171
pub fn msrs_to_save(&self) -> Result<MsrList, crate::arch::x86_64::msr::MsrError> {
@@ -215,4 +295,99 @@ pub(crate) mod tests {
215295
.iter()
216296
.any(|c| *c == kvm_bindings::KVM_CAP_IOEVENTFD));
217297
}
298+
299+
#[cfg(target_arch = "x86_64")]
300+
mod x86_64 {
301+
use super::*;
302+
use crate::arch::x86_64::cpu_model::CpuModel;
303+
304+
#[derive(PartialEq, PartialOrd)]
305+
struct KernelVersion(u32, u32);
306+
307+
impl KernelVersion {
308+
fn current() -> Self {
309+
let version_str = std::fs::read_to_string("/proc/sys/kernel/osrelease").unwrap();
310+
let mut parts = version_str.trim().split('.');
311+
312+
let major = parts.next().unwrap().parse::<u32>().unwrap();
313+
let minor = parts.next().unwrap().parse::<u32>().unwrap();
314+
315+
KernelVersion(major, minor)
316+
}
317+
}
318+
319+
#[derive(PartialEq)]
320+
enum Vendor {
321+
Intel,
322+
Amd,
323+
}
324+
325+
impl Vendor {
326+
fn new() -> Self {
327+
let vendor_id = Self::get_vendor_id_str();
328+
match vendor_id.as_str() {
329+
"GenuineIntel" => Vendor::Intel,
330+
"AuthenticAMD" => Vendor::Amd,
331+
_ => panic!("Unknown vendor_id: {}", vendor_id),
332+
}
333+
}
334+
335+
fn get_vendor_id_str() -> String {
336+
let cpuinfo = std::fs::read_to_string("/proc/cpuinfo").unwrap();
337+
338+
for line in cpuinfo.lines() {
339+
if line.starts_with("vendor_id") {
340+
return line
341+
.split(':')
342+
.nth(1)
343+
.map(|s| s.trim().to_string())
344+
.unwrap();
345+
}
346+
}
347+
panic!("`vendor_id` not found in /proc/cpuinfo");
348+
}
349+
}
350+
351+
#[cfg(target_arch = "x86_64")]
352+
#[test]
353+
fn test_enable_intel_amx() {
354+
Kvm::enable_intel_amx().unwrap();
355+
356+
// ARCH_{REQ,GET}_XCOMP_GUEST_PERM were added in kernel v5.17.
357+
// https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
358+
let supported_version = KernelVersion(5, 17);
359+
let current_version = KernelVersion::current();
360+
361+
if current_version >= supported_version {
362+
let mut permitted_xfeatures: libc::c_ulong = 0;
363+
// SAFETY: Safe because the second input (`op`) should be valid on kernel v5.17+,
364+
// and the third input (`addr`) is a valid `c_ulong` pointer.
365+
SyscallReturnCode(unsafe {
366+
libc::syscall(
367+
libc::SYS_arch_prctl,
368+
arch_prctl::ARCH_GET_XCOMP_GUEST_PERM,
369+
&mut permitted_xfeatures as *mut libc::c_ulong,
370+
)
371+
})
372+
.into_empty_result()
373+
.unwrap();
374+
375+
// Intel AMX is available only on Intel processors now.
376+
let vendor = Vendor::new();
377+
378+
// Intel AMX is introduced in Intel Sapphire Rapids (CPUID.01H:EAX = 0x000806f8).
379+
let supported_cpu = CpuModel::from(&0x000806f8);
380+
let current_cpu = CpuModel::get_cpu_model();
381+
382+
if current_cpu >= supported_cpu && vendor == Vendor::Intel {
383+
assert_eq!(
384+
permitted_xfeatures & Kvm::INTEL_AMX_XCOMP_MASK,
385+
Kvm::INTEL_AMX_XCOMP_MASK
386+
);
387+
} else {
388+
assert_eq!(permitted_xfeatures & Kvm::INTEL_AMX_XCOMP_MASK, 0);
389+
}
390+
}
391+
}
392+
}
218393
}

0 commit comments

Comments
 (0)