@@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize};
19
19
use crate :: arch:: x86_64:: interrupts;
20
20
use crate :: arch:: x86_64:: msr:: { create_boot_msr_entries, MsrError } ;
21
21
use crate :: arch:: x86_64:: regs:: { SetupFpuError , SetupRegistersError , SetupSpecialRegistersError } ;
22
+ use crate :: arch_gen:: x86:: msr_index:: { MSR_IA32_TSC , MSR_IA32_TSC_DEADLINE } ;
22
23
use crate :: cpu_config:: x86_64:: { cpuid, CpuConfiguration } ;
23
24
use crate :: logger:: { IncMetric , METRICS } ;
24
25
use crate :: vstate:: memory:: { Address , GuestAddress , GuestMemoryMmap } ;
@@ -282,6 +283,39 @@ impl KvmVcpu {
282
283
Ok ( cpuid)
283
284
}
284
285
286
+ /// If the IA32_TSC_DEADLINE MSR value is zero, update it
287
+ /// with the IA32_TSC value to guarantee that
288
+ /// the vCPU will continue receiving interrupts after restoring from a snapshot.
289
+ ///
290
+ /// Rationale: we observed that sometimes when taking a snapshot,
291
+ /// the IA32_TSC_DEADLINE MSR is cleared, but the interrupt is not
292
+ /// delivered to the guest, leading to a situation where one
293
+ /// of the vCPUs never receives TSC interrupts after restoring,
294
+ /// until the MSR is updated externally, eg by setting the system time.
295
+ fn fix_zero_tsc_deadline_msr ( msr_chunks : & mut [ Msrs ] ) {
296
+ // We do not expect more than 1 TSC MSR entry, but if there are multiple, pick the maximum.
297
+ let max_tsc_value = msr_chunks
298
+ . iter ( )
299
+ . flat_map ( |msrs| msrs. as_slice ( ) )
300
+ . filter ( |msr| msr. index == MSR_IA32_TSC )
301
+ . map ( |msr| msr. data )
302
+ . max ( ) ;
303
+
304
+ if let Some ( tsc_value) = max_tsc_value {
305
+ msr_chunks
306
+ . iter_mut ( )
307
+ . flat_map ( |msrs| msrs. as_mut_slice ( ) )
308
+ . filter ( |msr| msr. index == MSR_IA32_TSC_DEADLINE && msr. data == 0 )
309
+ . for_each ( |msr| {
310
+ warn ! (
311
+ "MSR_IA32_TSC_DEADLINE is 0, replacing with {:x}." ,
312
+ tsc_value
313
+ ) ;
314
+ msr. data = tsc_value;
315
+ } ) ;
316
+ }
317
+ }
318
+
285
319
/// Get MSR chunks for the given MSR index list.
286
320
///
287
321
/// KVM only supports getting `KVM_MAX_MSR_ENTRIES` at a time, so we divide
@@ -321,6 +355,8 @@ impl KvmVcpu {
321
355
msr_chunks. push ( msrs) ;
322
356
}
323
357
358
+ Self :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
359
+
324
360
Ok ( msr_chunks)
325
361
}
326
362
@@ -594,6 +630,7 @@ mod tests {
594
630
595
631
use std:: os:: unix:: io:: AsRawFd ;
596
632
633
+ use kvm_bindings:: kvm_msr_entry;
597
634
use kvm_ioctls:: Cap ;
598
635
599
636
use super :: * ;
@@ -949,4 +986,77 @@ mod tests {
949
986
}
950
987
}
951
988
}
989
+
990
+ fn msrs_from_entries ( msr_entries : & [ ( u32 , u64 ) ] ) -> Msrs {
991
+ Msrs :: from_entries (
992
+ & msr_entries
993
+ . iter ( )
994
+ . map ( |& ( index, data) | kvm_msr_entry {
995
+ index,
996
+ data,
997
+ ..Default :: default ( )
998
+ } )
999
+ . collect :: < Vec < _ > > ( ) ,
1000
+ )
1001
+ . unwrap ( )
1002
+ }
1003
+
1004
+ fn assert_msrs ( msr_chunks : & [ Msrs ] , expected_msr_entries : & [ ( u32 , u64 ) ] ) {
1005
+ let flattened_msrs = msr_chunks. iter ( ) . flat_map ( |msrs| msrs. as_slice ( ) ) ;
1006
+ for ( a, b) in flattened_msrs. zip ( expected_msr_entries. iter ( ) ) {
1007
+ assert_eq ! ( a. index, b. 0 ) ;
1008
+ assert_eq ! ( a. data, b. 1 ) ;
1009
+ }
1010
+ }
1011
+
1012
+ #[ test]
1013
+ fn test_fix_zero_tsc_deadline_msr_zero_same_chunk ( ) {
1014
+ // Place both TSC and TSC_DEADLINE MSRs in the same chunk.
1015
+ let mut msr_chunks = [ msrs_from_entries ( & [
1016
+ ( MSR_IA32_TSC_DEADLINE , 0 ) ,
1017
+ ( MSR_IA32_TSC , 42 ) ,
1018
+ ] ) ] ;
1019
+
1020
+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1021
+
1022
+ // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
1023
+ assert_msrs (
1024
+ & msr_chunks,
1025
+ & [ ( MSR_IA32_TSC_DEADLINE , 42 ) , ( MSR_IA32_TSC , 42 ) ] ,
1026
+ ) ;
1027
+ }
1028
+
1029
+ #[ test]
1030
+ fn test_fix_zero_tsc_deadline_msr_zero_separate_chunks ( ) {
1031
+ // Place both TSC and TSC_DEADLINE MSRs in separate chunks.
1032
+ let mut msr_chunks = [
1033
+ msrs_from_entries ( & [ ( MSR_IA32_TSC_DEADLINE , 0 ) ] ) ,
1034
+ msrs_from_entries ( & [ ( MSR_IA32_TSC , 42 ) ] ) ,
1035
+ ] ;
1036
+
1037
+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1038
+
1039
+ // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
1040
+ assert_msrs (
1041
+ & msr_chunks,
1042
+ & [ ( MSR_IA32_TSC_DEADLINE , 42 ) , ( MSR_IA32_TSC , 42 ) ] ,
1043
+ ) ;
1044
+ }
1045
+
1046
+ #[ test]
1047
+ fn test_fix_zero_tsc_deadline_msr_non_zero ( ) {
1048
+ let mut msr_chunks = [ msrs_from_entries ( & [
1049
+ ( MSR_IA32_TSC_DEADLINE , 1 ) ,
1050
+ ( MSR_IA32_TSC , 2 ) ,
1051
+ ] ) ] ;
1052
+
1053
+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1054
+
1055
+ // We expect that MSR_IA32_TSC_DEADLINE should remain unchanged, because it is non-zero
1056
+ // already.
1057
+ assert_msrs (
1058
+ & msr_chunks,
1059
+ & [ ( MSR_IA32_TSC_DEADLINE , 1 ) , ( MSR_IA32_TSC , 2 ) ] ,
1060
+ ) ;
1061
+ }
952
1062
}
0 commit comments