Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/vmm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ license = "Apache-2.0"
bench = false

[dependencies]
acpi_tables = { path = "../acpi-tables" }
acpi_tables = { path = "../acpi-tables" }
aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] }
arrayvec = { version = "0.7.6", optional = true }
aws-lc-rs = { version = "1.12.4", features = ["bindgen"] }
Expand Down
43 changes: 43 additions & 0 deletions src/vmm/src/arch/x86_64/gen/arch_prctl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

// automatically generated by tools/bindgen.sh

#![allow(
non_camel_case_types,
non_upper_case_globals,
dead_code,
non_snake_case,
clippy::ptr_as_ptr,
clippy::undocumented_unsafe_blocks,
missing_debug_implementations,
clippy::tests_outside_test_module
)]

pub const ARCH_SET_GS: u32 = 4097;
pub const ARCH_SET_FS: u32 = 4098;
pub const ARCH_GET_FS: u32 = 4099;
pub const ARCH_GET_GS: u32 = 4100;
pub const ARCH_GET_CPUID: u32 = 4113;
pub const ARCH_SET_CPUID: u32 = 4114;
pub const ARCH_GET_XCOMP_SUPP: u32 = 4129;
pub const ARCH_GET_XCOMP_PERM: u32 = 4130;
pub const ARCH_REQ_XCOMP_PERM: u32 = 4131;
pub const ARCH_GET_XCOMP_GUEST_PERM: u32 = 4132;
pub const ARCH_REQ_XCOMP_GUEST_PERM: u32 = 4133;
pub const ARCH_XCOMP_TILECFG: u32 = 17;
pub const ARCH_XCOMP_TILEDATA: u32 = 18;
pub const ARCH_MAP_VDSO_X32: u32 = 8193;
pub const ARCH_MAP_VDSO_32: u32 = 8194;
pub const ARCH_MAP_VDSO_64: u32 = 8195;
pub const ARCH_GET_UNTAG_MASK: u32 = 16385;
pub const ARCH_ENABLE_TAGGED_ADDR: u32 = 16386;
pub const ARCH_GET_MAX_TAG_BITS: u32 = 16387;
pub const ARCH_FORCE_TAGGED_SVA: u32 = 16388;
pub const ARCH_SHSTK_ENABLE: u32 = 20481;
pub const ARCH_SHSTK_DISABLE: u32 = 20482;
pub const ARCH_SHSTK_LOCK: u32 = 20483;
pub const ARCH_SHSTK_UNLOCK: u32 = 20484;
pub const ARCH_SHSTK_STATUS: u32 = 20485;
pub const ARCH_SHSTK_SHSTK: u32 = 1;
pub const ARCH_SHSTK_WRSS: u32 = 2;
2 changes: 1 addition & 1 deletion src/vmm/src/arch/x86_64/gen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.

pub mod arch_prctl;
pub mod hyperv;
pub mod hyperv_tlfs;
pub mod mpspec;

pub mod msr_index;
pub mod perf_event;
177 changes: 176 additions & 1 deletion src/vmm/src/vstate/kvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES};
use kvm_ioctls::Kvm as KvmFd;
use serde::{Deserialize, Serialize};
#[cfg(target_arch = "x86_64")]
use vmm_sys_util::syscall::SyscallReturnCode;

#[cfg(target_arch = "x86_64")]
use crate::arch::x86_64::gen::arch_prctl;
use crate::cpu_config::templates::KvmCapability;
use crate::vstate::memory::{GuestMemory, GuestMemoryMmap};

/// Errors associated with the wrappers over KVM ioctls.
/// Needs `rustfmt::skip` to make multiline comments work
#[rustfmt::skip]
#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)]
#[derive(Debug, thiserror::Error, displaydoc::Display)]

Check warning on line 20 in src/vmm/src/vstate/kvm.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/kvm.rs#L20

Added line #L20 was not covered by tests
pub enum KvmError {
/// The host kernel reports an invalid KVM API version: {0}
ApiVersion(i32),
Expand All @@ -25,8 +29,14 @@
#[cfg(target_arch = "x86_64")]
/// Failed to get supported cpuid: {0}
GetSupportedCpuId(kvm_ioctls::Error),
#[cfg(target_arch = "x86_64")]
/// Failed to get supported XSTATE features: {0}
GetSupportedXstateFeatures(std::io::Error),
/// The number of configured slots is bigger than the maximum reported by KVM
NotEnoughMemorySlots,
#[cfg(target_arch = "x86_64")]
/// Failed to enable XSTATE features ({0:#b}): {1}
RequestXstateFeatures(u32, std::io::Error),
}

/// Struct with kvm fd and kvm associated paramenters.
Expand Down Expand Up @@ -73,6 +83,8 @@

#[cfg(target_arch = "x86_64")]
{
Self::enable_intel_amx()?;

let supported_cpuid = kvm_fd
.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES)
.map_err(KvmError::GetSupportedCpuId)?;
Expand All @@ -86,6 +98,74 @@
}
}

#[cfg(target_arch = "x86_64")]
// XSTATE feature mask for Intel AMX.
const INTEL_AMX_XCOMP_MASK: libc::c_ulong =
(1u64 << arch_prctl::ARCH_XCOMP_TILECFG) | (1u64 << arch_prctl::ARCH_XCOMP_TILEDATA);

/// Enable Intel AMX if available.
///
/// Intel AMX (Advanced Matrix Extensions) is an instruction set for AI workloads that was
/// introduced in Intel Sapphire Rapids (*7i.metal). Since it requires larger area to save the
/// state, it is disabled by default.
/// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
///
/// We enable it by default but can be disabled by CPU template; otherwise,
/// KVM_GET_SUPPORTED_CPUID returns a inconsistent state where TILECFG is enabled but TILEDATA
/// is disabled, causing guest's #GP fault on xsetbv due to the lack of sanity check.
/// https://lore.kernel.org/all/[email protected]/
///
/// Dynamically-enabled feature bits need to be requested with arch_prctl() before calling
/// KVM_GET_SUPPORTED_CPUID. Feature bits that have not been requested are excluded from the
/// result of KVM_GET_SUPPORTED_CPUID.
/// https://docs.kernel.org/virt/kvm/api.html
///
/// Note that no memory allocation to save Intel AMX state happens here immediately.
#[cfg(target_arch = "x86_64")]
fn enable_intel_amx() -> Result<(), KvmError> {
// Get the supported xstate features.
let mut supported_xfeatures: libc::c_ulong = 0;
// SAFETY: Safe because the second input (`op`) might not be valid for unsupported kernels
// but EINVAL is handled later, and the third input (`addr`) is a valid c_ulong pointer.
// https://man7.org/linux/man-pages/man2/arch_prctl.2.html
SyscallReturnCode(unsafe {
libc::syscall(
libc::SYS_arch_prctl,
arch_prctl::ARCH_GET_XCOMP_SUPP,
&mut supported_xfeatures as *mut libc::c_ulong,
)
})
.into_empty_result()
.or_else(|err| {
// EINVAL is returned if ARCH_GET_XCOMP_SUPP is not supported (e.g. kernel versions
// prior to v5.17).
// https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
if err.raw_os_error() == Some(libc::EINVAL) {
Ok(())
} else {
Err(err)

Check warning on line 146 in src/vmm/src/vstate/kvm.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/kvm.rs#L146

Added line #L146 was not covered by tests
}
})
.map_err(KvmError::GetSupportedXstateFeatures)?;

// Enable Intel AMX if supported.
if (supported_xfeatures & Self::INTEL_AMX_XCOMP_MASK) == Self::INTEL_AMX_XCOMP_MASK {
// SAFETY: Safe because ARCH_REQ_XCOMP_GUEST_PERM is supported if ARCH_GET_XCOMP_SUPP is
// supported and it has been confirmed that ARCH_XCOMP_TILEDATA is supported.
SyscallReturnCode(unsafe {
libc::syscall(
libc::SYS_arch_prctl,
arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM,
arch_prctl::ARCH_XCOMP_TILEDATA,
)
})
.into_empty_result()
.map_err(|err| KvmError::RequestXstateFeatures(arch_prctl::ARCH_XCOMP_TILEDATA, err))?;

Check warning on line 163 in src/vmm/src/vstate/kvm.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/kvm.rs#L155-L163

Added lines #L155 - L163 were not covered by tests
}

Ok(())
}

/// Msrs needed to be saved on snapshot creation.
#[cfg(target_arch = "x86_64")]
pub fn msrs_to_save(&self) -> Result<MsrList, crate::arch::x86_64::msr::MsrError> {
Expand Down Expand Up @@ -215,4 +295,99 @@
.iter()
.any(|c| *c == kvm_bindings::KVM_CAP_IOEVENTFD));
}

#[cfg(target_arch = "x86_64")]
mod x86_64 {
use super::*;
use crate::arch::x86_64::cpu_model::CpuModel;

#[derive(PartialEq, PartialOrd)]
struct KernelVersion(u32, u32);

impl KernelVersion {
fn current() -> Self {
let version_str = std::fs::read_to_string("/proc/sys/kernel/osrelease").unwrap();
let mut parts = version_str.trim().split('.');

let major = parts.next().unwrap().parse::<u32>().unwrap();
let minor = parts.next().unwrap().parse::<u32>().unwrap();

KernelVersion(major, minor)
}
}

#[derive(PartialEq)]
enum Vendor {
Intel,
Amd,
}

impl Vendor {
fn new() -> Self {
let vendor_id = Self::get_vendor_id_str();
match vendor_id.as_str() {
"GenuineIntel" => Vendor::Intel,
"AuthenticAMD" => Vendor::Amd,
_ => panic!("Unknown vendor_id: {}", vendor_id),
}
}

fn get_vendor_id_str() -> String {
let cpuinfo = std::fs::read_to_string("/proc/cpuinfo").unwrap();

for line in cpuinfo.lines() {
if line.starts_with("vendor_id") {
return line
.split(':')
.nth(1)
.map(|s| s.trim().to_string())
.unwrap();
}
}
panic!("`vendor_id` not found in /proc/cpuinfo");
}
}

#[cfg(target_arch = "x86_64")]
#[test]
fn test_enable_intel_amx() {
Kvm::enable_intel_amx().unwrap();

// ARCH_{REQ,GET}_XCOMP_GUEST_PERM were added in kernel v5.17.
// https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
let supported_version = KernelVersion(5, 17);
let current_version = KernelVersion::current();

if current_version >= supported_version {
let mut permitted_xfeatures: libc::c_ulong = 0;
// SAFETY: Safe because the second input (`op`) should be valid on kernel v5.17+,
// and the third input (`addr`) is a valid `c_ulong` pointer.
SyscallReturnCode(unsafe {
libc::syscall(
libc::SYS_arch_prctl,
arch_prctl::ARCH_GET_XCOMP_GUEST_PERM,
&mut permitted_xfeatures as *mut libc::c_ulong,
)
})
.into_empty_result()
.unwrap();

// Intel AMX is available only on Intel processors now.
let vendor = Vendor::new();

// Intel AMX is introduced in Intel Sapphire Rapids (CPUID.01H:EAX = 0x000806f8).
let supported_cpu = CpuModel::from(&0x000806f8);
let current_cpu = CpuModel::get_cpu_model();

if current_cpu >= supported_cpu && vendor == Vendor::Intel {
assert_eq!(
permitted_xfeatures & Kvm::INTEL_AMX_XCOMP_MASK,
Kvm::INTEL_AMX_XCOMP_MASK
);
} else {
assert_eq!(permitted_xfeatures & Kvm::INTEL_AMX_XCOMP_MASK, 0);
}
}
}
}
}
4 changes: 4 additions & 0 deletions tests/framework/utils_cpuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class CpuModel(str, Enum):
INTEL_SKYLAKE = "INTEL_SKYLAKE"
INTEL_CASCADELAKE = "INTEL_CASCADELAKE"
INTEL_ICELAKE = "INTEL_ICELAKE"
INTEL_SAPPHIRE_RAPIDS = "INTEL_SAPPHIRE_RAPIDS"


CPU_DICT = {
Expand All @@ -40,6 +41,7 @@ class CpuModel(str, Enum):
"Intel(R) Xeon(R) Platinum 8124M CPU": "INTEL_SKYLAKE",
"Intel(R) Xeon(R) Platinum 8259CL CPU": "INTEL_CASCADELAKE",
"Intel(R) Xeon(R) Platinum 8375C CPU": "INTEL_ICELAKE",
"Intel(R) Xeon(R) Platinum 8488C": "INTEL_SAPPHIRE_RAPIDS",
},
CpuVendor.AMD: {"AMD EPYC 7R13": "AMD_MILAN", "AMD EPYC 9R14": "AMD_GENOA"},
CpuVendor.ARM: {
Expand Down Expand Up @@ -83,6 +85,8 @@ def get_cpu_codename(default="Unknown"):
result = re.match(r"^(.*) @.*$", cpu_model)
if result:
return CPU_DICT[CpuVendor.INTEL].get(result.group(1), default)
# Some Intel CPUs (e.g. Intel Sapphire Rapids) don't include "@ <frequency>".
return CPU_DICT[CpuVendor.INTEL].get(cpu_model, default)
if vendor == CpuVendor.AMD:
result = re.match(r"^(.*) [0-9]*-Core Processor$", cpu_model)
if result:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,12 @@
}


def test_host_vs_guest_cpu_features(uvm_nano):
def test_host_vs_guest_cpu_features(uvm_plain_any):
"""Check CPU features host vs guest"""

vm = uvm_nano
vm = uvm_plain_any
vm.spawn()
vm.basic_config()
vm.add_net_iface()
vm.start()
host_feats = set(utils.check_output(CPU_FEATURES_CMD).stdout.split())
Expand Down Expand Up @@ -231,6 +233,10 @@ def test_host_vs_guest_cpu_features(uvm_nano):
assert host_feats - guest_feats == host_guest_diff_6_1
assert guest_feats - host_feats == INTEL_GUEST_ONLY_FEATS - {"umip"}

case CpuModel.INTEL_SAPPHIRE_RAPIDS:
assert host_feats - guest_feats == INTEL_HOST_ONLY_FEATS
assert guest_feats - host_feats == INTEL_GUEST_ONLY_FEATS

case CpuModel.ARM_NEOVERSE_N1:
expected_guest_minus_host = set()
expected_host_minus_guest = set()
Expand Down
5 changes: 5 additions & 0 deletions tests/integration_tests/performance/test_snapshot_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import host_tools.drive as drive_tools
from framework.microvm import Microvm
from framework.properties import global_props

USEC_IN_MSEC = 1000
ITERATIONS = 30
Expand Down Expand Up @@ -124,6 +125,10 @@ def sample_latency(
],
ids=lambda x: x.id,
)
@pytest.mark.skipif(
global_props.cpu_codename == "INTEL_SAPPHIRE_RAPIDS",
reason="Intel Sapphire Rapids to be supported soon in upcoming change",
)
def test_restore_latency(
microvm_factory, rootfs, guest_kernel_linux_5_10, test_setup, metrics
):
Expand Down
9 changes: 9 additions & 0 deletions tools/bindgen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ fc-bindgen \
"amazonlinux-v5.10.y/include/uapi/linux/io_uring.h" \
>src/vmm/src/io_uring/gen.rs

# Latest upstream kernel
KERNEL_SRC_DIR="linux"
[ -d ${KERNEL_SRC_DIR} ] || git clone --depth 1 https://github.com/amazonlinux/linux ${KERNEL_SRC_DIR}

info "BINDGEN asm/prctl.h"
fc-bindgen \
--allowlist-var "ARCH_.*" \
"${KERNEL_SRC_DIR}/arch/x86/include/uapi/asm/prctl.h" >src/vmm/src/arch/x86_64/gen/arch_prctl.rs

# Apply any patches
info "Apply patches"
for PATCH in $(dirname $0)/bindgen-patches/*.patch; do
Expand Down