@@ -6,7 +6,11 @@ use kvm_bindings::KVM_API_VERSION;
66use kvm_bindings:: { CpuId , MsrList , KVM_MAX_CPUID_ENTRIES } ;
77use kvm_ioctls:: Kvm as KvmFd ;
88use serde:: { Deserialize , Serialize } ;
9+ #[ cfg( target_arch = "x86_64" ) ]
10+ use vmm_sys_util:: syscall:: SyscallReturnCode ;
911
12+ #[ cfg( target_arch = "x86_64" ) ]
13+ use crate :: arch:: x86_64:: gen:: arch_prctl;
1014use crate :: cpu_config:: templates:: KvmCapability ;
1115use crate :: vstate:: memory:: { GuestMemory , GuestMemoryMmap } ;
1216
@@ -25,8 +29,14 @@ pub enum KvmError {
2529 #[ cfg( target_arch = "x86_64" ) ]
2630 /// Failed to get supported cpuid: {0}
2731 GetSupportedCpuId ( kvm_ioctls:: Error ) ,
32+ #[ cfg( target_arch = "x86_64" ) ]
33+ /// Failed to get supported XSTATE features: {0}
34+ GetSupportedXstateFeatures ( std:: io:: Error ) ,
2835 /// The number of configured slots is bigger than the maximum reported by KVM
2936 NotEnoughMemorySlots ,
37+ #[ cfg( target_arch = "x86_64" ) ]
38+ /// Failed to enable XSTATE features ({0:#b}): {1}
39+ RequestXstateFeatures ( u32 , std:: io:: Error ) ,
3040}
3141
3242/// Struct with kvm fd and kvm associated paramenters.
@@ -73,6 +83,8 @@ impl Kvm {
7383
7484 #[ cfg( target_arch = "x86_64" ) ]
7585 {
86+ Self :: enable_intel_amx ( ) ?;
87+
7688 let supported_cpuid = kvm_fd
7789 . get_supported_cpuid ( KVM_MAX_CPUID_ENTRIES )
7890 . map_err ( KvmError :: GetSupportedCpuId ) ?;
@@ -86,6 +98,74 @@ impl Kvm {
8698 }
8799 }
88100
101+ #[ cfg( target_arch = "x86_64" ) ]
102+ // XSTATE feature mask for Intel AMX.
103+ const INTEL_AMX_XCOMP_MASK : libc:: c_ulong =
104+ ( 1u64 << arch_prctl:: ARCH_XCOMP_TILECFG ) | ( 1u64 << arch_prctl:: ARCH_XCOMP_TILEDATA ) ;
105+
106+ /// Enable Intel AMX if available.
107+ ///
108+ /// Intel AMX (Advanced Matrix Extensions) is an instruction set for AI workloads that was
109+ /// introduced in Intel Sapphire Rapids (*7i.metal). Since it requires larger area to save the
110+ /// state, it is disabled by default.
111+ /// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst
112+ ///
113+ /// We enable it by default but can be disabled by CPU template; otherwise,
114+ /// KVM_GET_SUPPORTED_CPUID returns a inconsistent state where TILECFG is enabled but TILEDATA
115+ /// is disabled, causing guest's #GP fault on xsetbv due to the lack of sanity check.
116+ /// https://lore.kernel.org/all/[email protected] / 117+ ///
118+ /// Dynamically-enabled feature bits need to be requested with arch_prctl() before calling
119+ /// KVM_GET_SUPPORTED_CPUID. Feature bits that have not been requested are excluded from the
120+ /// result of KVM_GET_SUPPORTED_CPUID.
121+ /// https://docs.kernel.org/virt/kvm/api.html
122+ ///
123+ /// Note that no memory allocation to save Intel AMX state happens here immediately.
124+ #[ cfg( target_arch = "x86_64" ) ]
125+ fn enable_intel_amx ( ) -> Result < ( ) , KvmError > {
126+ // Get the supported xstate features.
127+ let mut supported_xfeatures: libc:: c_ulong = 0 ;
128+ // SAFETY: Safe because the second input (`op`) might not be valid for unsupported kernels
129+ // but EINVAL is handled later, and the third input (`addr`) is a valid c_ulong pointer.
130+ // https://man7.org/linux/man-pages/man2/arch_prctl.2.html
131+ SyscallReturnCode ( unsafe {
132+ libc:: syscall (
133+ libc:: SYS_arch_prctl ,
134+ arch_prctl:: ARCH_GET_XCOMP_SUPP ,
135+ & mut supported_xfeatures as * mut libc:: c_ulong ,
136+ )
137+ } )
138+ . into_empty_result ( )
139+ . or_else ( |err| {
140+ // EINVAL is returned if ARCH_GET_XCOMP_SUPP is not supported (e.g. kernel versions
141+ // prior to v5.17).
142+ // https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
143+ if err. raw_os_error ( ) == Some ( libc:: EINVAL ) {
144+ Ok ( ( ) )
145+ } else {
146+ Err ( err)
147+ }
148+ } )
149+ . map_err ( KvmError :: GetSupportedXstateFeatures ) ?;
150+
151+ // Enable Intel AMX if supported.
152+ if ( supported_xfeatures & Self :: INTEL_AMX_XCOMP_MASK ) == Self :: INTEL_AMX_XCOMP_MASK {
153+ // SAFETY: Safe because ARCH_REQ_XCOMP_GUEST_PERM is supported if ARCH_GET_XCOMP_SUPP is
154+ // supported and it has been confirmed that ARCH_XCOMP_TILEDATA is supported.
155+ SyscallReturnCode ( unsafe {
156+ libc:: syscall (
157+ libc:: SYS_arch_prctl ,
158+ arch_prctl:: ARCH_REQ_XCOMP_GUEST_PERM ,
159+ arch_prctl:: ARCH_XCOMP_TILEDATA ,
160+ )
161+ } )
162+ . into_empty_result ( )
163+ . map_err ( |err| KvmError :: RequestXstateFeatures ( arch_prctl:: ARCH_XCOMP_TILEDATA , err) ) ?;
164+ }
165+
166+ Ok ( ( ) )
167+ }
168+
89169 /// Msrs needed to be saved on snapshot creation.
90170 #[ cfg( target_arch = "x86_64" ) ]
91171 pub fn msrs_to_save ( & self ) -> Result < MsrList , crate :: arch:: x86_64:: msr:: MsrError > {
@@ -215,4 +295,99 @@ pub(crate) mod tests {
215295 . iter( )
216296 . any( |c| * c == kvm_bindings:: KVM_CAP_IOEVENTFD ) ) ;
217297 }
298+
299+ #[ cfg( target_arch = "x86_64" ) ]
300+ mod x86_64 {
301+ use super :: * ;
302+ use crate :: arch:: x86_64:: cpu_model:: CpuModel ;
303+
304+ #[ derive( PartialEq , PartialOrd ) ]
305+ struct KernelVersion ( u32 , u32 ) ;
306+
307+ impl KernelVersion {
308+ fn current ( ) -> Self {
309+ let version_str = std:: fs:: read_to_string ( "/proc/sys/kernel/osrelease" ) . unwrap ( ) ;
310+ let mut parts = version_str. trim ( ) . split ( '.' ) ;
311+
312+ let major = parts. next ( ) . unwrap ( ) . parse :: < u32 > ( ) . unwrap ( ) ;
313+ let minor = parts. next ( ) . unwrap ( ) . parse :: < u32 > ( ) . unwrap ( ) ;
314+
315+ KernelVersion ( major, minor)
316+ }
317+ }
318+
319+ #[ derive( PartialEq ) ]
320+ enum Vendor {
321+ Intel ,
322+ Amd ,
323+ }
324+
325+ impl Vendor {
326+ fn new ( ) -> Self {
327+ let vendor_id = Self :: get_vendor_id_str ( ) ;
328+ match vendor_id. as_str ( ) {
329+ "GenuineIntel" => Vendor :: Intel ,
330+ "AuthenticAMD" => Vendor :: Amd ,
331+ _ => panic ! ( "Unknown vendor_id: {}" , vendor_id) ,
332+ }
333+ }
334+
335+ fn get_vendor_id_str ( ) -> String {
336+ let cpuinfo = std:: fs:: read_to_string ( "/proc/cpuinfo" ) . unwrap ( ) ;
337+
338+ for line in cpuinfo. lines ( ) {
339+ if line. starts_with ( "vendor_id" ) {
340+ return line
341+ . split ( ':' )
342+ . nth ( 1 )
343+ . map ( |s| s. trim ( ) . to_string ( ) )
344+ . unwrap ( ) ;
345+ }
346+ }
347+ panic ! ( "`vendor_id` not found in /proc/cpuinfo" ) ;
348+ }
349+ }
350+
351+ #[ cfg( target_arch = "x86_64" ) ]
352+ #[ test]
353+ fn test_enable_intel_amx ( ) {
354+ Kvm :: enable_intel_amx ( ) . unwrap ( ) ;
355+
356+ // ARCH_{REQ,GET}_XCOMP_GUEST_PERM were added in kernel v5.17.
357+ // https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1
358+ let supported_version = KernelVersion ( 5 , 17 ) ;
359+ let current_version = KernelVersion :: current ( ) ;
360+
361+ if current_version >= supported_version {
362+ let mut permitted_xfeatures: libc:: c_ulong = 0 ;
363+ // SAFETY: Safe because the second input (`op`) should be valid on kernel v5.17+,
364+ // and the third input (`addr`) is a valid `c_ulong` pointer.
365+ SyscallReturnCode ( unsafe {
366+ libc:: syscall (
367+ libc:: SYS_arch_prctl ,
368+ arch_prctl:: ARCH_GET_XCOMP_GUEST_PERM ,
369+ & mut permitted_xfeatures as * mut libc:: c_ulong ,
370+ )
371+ } )
372+ . into_empty_result ( )
373+ . unwrap ( ) ;
374+
375+ // Intel AMX is available only on Intel processors now.
376+ let vendor = Vendor :: new ( ) ;
377+
378+ // Intel AMX is introduced in Intel Sapphire Rapids (CPUID.01H:EAX = 0x000806f8).
379+ let supported_cpu = CpuModel :: from ( & 0x000806f8 ) ;
380+ let current_cpu = CpuModel :: get_cpu_model ( ) ;
381+
382+ if current_cpu >= supported_cpu && vendor == Vendor :: Intel {
383+ assert_eq ! (
384+ permitted_xfeatures & Kvm :: INTEL_AMX_XCOMP_MASK ,
385+ Kvm :: INTEL_AMX_XCOMP_MASK
386+ ) ;
387+ } else {
388+ assert_eq ! ( permitted_xfeatures & Kvm :: INTEL_AMX_XCOMP_MASK , 0 ) ;
389+ }
390+ }
391+ }
392+ }
218393}
0 commit comments