Skip to content

Commit 4e02d4f

Browse files
committed
Merge tag 'kvm-x86-svm-6.16' of https://github.com/kvm-x86/linux into HEAD
KVM SVM changes for 6.16: - Wait for target vCPU to acknowledge KVM_REQ_UPDATE_PROTECTED_GUEST_STATE to fix a race between AP destroy and VMRUN. - Decrypt and dump the VMSA in dump_vmcb() if debugging enabled for the VM. - Add support for ALLOWED_SEV_FEATURES. - Add #VMGEXIT to the set of handlers special cased for CONFIG_RETPOLINE=y. - Treat DEBUGCTL[5:2] as reserved to pave the way for virtualizing features that utilize those bits. - Don't account temporary allocations in sev_send_update_data(). - Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM, via Bus Lock Threshold.
2 parents 3e89d5f + 72df72e commit 4e02d4f

File tree

14 files changed

+469
-31
lines changed

14 files changed

+469
-31
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8001,6 +8001,11 @@ apply some other policy-based mitigation. When exiting to userspace, KVM sets
80018001
KVM_RUN_X86_BUS_LOCK in vcpu-run->flags, and conditionally sets the exit_reason
80028002
to KVM_EXIT_X86_BUS_LOCK.
80038003

8004+
Due to differences in the underlying hardware implementation, the vCPU's RIP at
8005+
the time of exit diverges between Intel and AMD. On Intel hosts, RIP points at
8006+
the next instruction, i.e. the exit is trap-like. On AMD hosts, RIP points at
8007+
the offending instruction, i.e. the exit is fault-like.
8008+
80048009
Note! Detected bus locks may be coincident with other exits to userspace, i.e.
80058010
KVM_RUN_X86_BUS_LOCK should be checked regardless of the primary exit reason if
80068011
userspace wants to take action on all detected bus locks.

arch/x86/include/asm/cpufeatures.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@
379379
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
380380
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
381381
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
382+
#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */
382383
#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
383384

384385
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -447,6 +448,7 @@
447448
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
448449
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
449450
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
451+
#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
450452
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
451453
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
452454

arch/x86/include/asm/kvm_host.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@
125125
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
126126
#define KVM_REQ_HV_TLB_FLUSH \
127127
KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
128-
#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34)
128+
#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \
129+
KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT)
129130

130131
#define CR0_RESERVED_BITS \
131132
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -411,7 +412,6 @@ struct kvm_rmap_head {
411412
};
412413

413414
struct kvm_pio_request {
414-
unsigned long linear_rip;
415415
unsigned long count;
416416
int in;
417417
int port;
@@ -917,6 +917,7 @@ struct kvm_vcpu_arch {
917917
bool emulate_regs_need_sync_to_vcpu;
918918
bool emulate_regs_need_sync_from_vcpu;
919919
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
920+
unsigned long cui_linear_rip;
920921

921922
gpa_t time;
922923
s8 pvclock_tsc_shift;

arch/x86/include/asm/svm.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ enum {
116116
INTERCEPT_INVPCID,
117117
INTERCEPT_MCOMMIT,
118118
INTERCEPT_TLBSYNC,
119+
INTERCEPT_BUSLOCK,
119120
INTERCEPT_IDLE_HLT = 166,
120121
};
121122

@@ -159,7 +160,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
159160
u64 avic_physical_id; /* Offset 0xf8 */
160161
u8 reserved_7[8];
161162
u64 vmsa_pa; /* Used for an SEV-ES guest */
162-
u8 reserved_8[720];
163+
u8 reserved_8[16];
164+
u16 bus_lock_counter; /* Offset 0x120 */
165+
u8 reserved_9[22];
166+
u64 allowed_sev_features; /* Offset 0x138 */
167+
u64 guest_sev_features; /* Offset 0x140 */
168+
u8 reserved_10[664];
163169
/*
164170
* Offset 0x3e0, 32 bytes reserved
165171
* for use by hypervisor/software.
@@ -291,6 +297,8 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
291297
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
292298
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
293299

300+
#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63)
301+
294302
struct vmcb_seg {
295303
u16 selector;
296304
u16 attrib;

arch/x86/include/uapi/asm/svm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
9696
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
9797
#define SVM_EXIT_INVPCID 0x0a2
98+
#define SVM_EXIT_BUS_LOCK 0x0a5
9899
#define SVM_EXIT_IDLE_HLT 0x0a6
99100
#define SVM_EXIT_NPF 0x400
100101
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
@@ -225,6 +226,7 @@
225226
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
226227
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
227228
{ SVM_EXIT_INVPCID, "invpcid" }, \
229+
{ SVM_EXIT_BUS_LOCK, "buslock" }, \
228230
{ SVM_EXIT_IDLE_HLT, "idle-halt" }, \
229231
{ SVM_EXIT_NPF, "npf" }, \
230232
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \

arch/x86/kvm/svm/nested.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,33 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
678678
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
679679
vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
680680

681+
/*
682+
* Stash vmcb02's counter if the guest hasn't moved past the guilty
683+
* instruction; otherwise, reset the counter to '0'.
684+
*
685+
* In order to detect if L2 has made forward progress or not, track the
686+
* RIP at which a bus lock has occurred on a per-vmcb12 basis. If RIP
687+
* is changed, guest has clearly made forward progress, bus_lock_counter
688+
* still remained '1', so reset bus_lock_counter to '0'. Eg. In the
689+
* scenario, where a buslock happened in L1 before VMRUN, the bus lock
690+
* firmly happened on an instruction in the past. Even if vmcb01's
691+
* counter is still '1', (because the guilty instruction got patched),
692+
* the vCPU has clearly made forward progress and so KVM should reset
693+
* vmcb02's counter to '0'.
694+
*
695+
* If the RIP hasn't changed, stash the bus lock counter at nested VMRUN
696+
* to prevent the same guilty instruction from triggering a VM-Exit. Eg.
697+
* if userspace rate-limits the vCPU, then it's entirely possible that
698+
* L1's tick interrupt is pending by the time userspace re-runs the
699+
* vCPU. If KVM unconditionally clears the counter on VMRUN, then when
700+
* L1 re-enters L2, the same instruction will trigger a VM-Exit and the
701+
* entire cycle start over.
702+
*/
703+
if (vmcb02->save.rip && (svm->nested.ctl.bus_lock_rip == vmcb02->save.rip))
704+
vmcb02->control.bus_lock_counter = 1;
705+
else
706+
vmcb02->control.bus_lock_counter = 0;
707+
681708
/* Done at vmrun: asid. */
682709

683710
/* Also overwritten later if necessary. */
@@ -1039,6 +1066,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10391066

10401067
}
10411068

1069+
/*
1070+
* Invalidate bus_lock_rip unless KVM is still waiting for the guest
1071+
* to make forward progress before re-enabling bus lock detection.
1072+
*/
1073+
if (!vmcb02->control.bus_lock_counter)
1074+
svm->nested.ctl.bus_lock_rip = INVALID_GPA;
1075+
10421076
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
10431077

10441078
kvm_nested_vmexit_handle_ibrs(vcpu);

arch/x86/kvm/svm/sev.c

Lines changed: 107 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,8 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
560560
if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
561561
return -EFAULT;
562562

563+
sev->policy = params.policy;
564+
563565
memset(&start, 0, sizeof(start));
564566

565567
dh_blob = NULL;
@@ -1592,11 +1594,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
15921594

15931595
/* allocate memory for header and transport buffer */
15941596
ret = -ENOMEM;
1595-
hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
1597+
hdr = kzalloc(params.hdr_len, GFP_KERNEL);
15961598
if (!hdr)
15971599
goto e_unpin;
15981600

1599-
trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
1601+
trans_data = kzalloc(params.trans_len, GFP_KERNEL);
16001602
if (!trans_data)
16011603
goto e_free_hdr;
16021604

@@ -2199,6 +2201,8 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
21992201
if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
22002202
return -EINVAL;
22012203

2204+
sev->policy = params.policy;
2205+
22022206
sev->snp_context = snp_context_create(kvm, argp);
22032207
if (!sev->snp_context)
22042208
return -ENOTTY;
@@ -3994,10 +3998,8 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
39943998
* Unless Creation is deferred until INIT, signal the vCPU to update
39953999
* its state.
39964000
*/
3997-
if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT) {
3998-
kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
3999-
kvm_vcpu_kick(target_vcpu);
4000-
}
4001+
if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT)
4002+
kvm_make_request_and_kick(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
40014003

40024004
return 0;
40034005
}
@@ -4455,6 +4457,7 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
44554457

44564458
static void sev_es_init_vmcb(struct vcpu_svm *svm)
44574459
{
4460+
struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
44584461
struct vmcb *vmcb = svm->vmcb01.ptr;
44594462
struct kvm_vcpu *vcpu = &svm->vcpu;
44604463

@@ -4470,6 +4473,10 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
44704473
if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
44714474
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
44724475

4476+
if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES))
4477+
svm->vmcb->control.allowed_sev_features = sev->vmsa_features |
4478+
VMCB_ALLOWED_SEV_FEATURES_VALID;
4479+
44734480
/* Can't intercept CR register access, HV can't modify CR registers */
44744481
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
44754482
svm_clr_intercept(svm, INTERCEPT_CR4_READ);
@@ -4930,3 +4937,97 @@ int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
49304937

49314938
return level;
49324939
}
4940+
4941+
struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
4942+
{
4943+
struct vcpu_svm *svm = to_svm(vcpu);
4944+
struct vmcb_save_area *vmsa;
4945+
struct kvm_sev_info *sev;
4946+
int error = 0;
4947+
int ret;
4948+
4949+
if (!sev_es_guest(vcpu->kvm))
4950+
return NULL;
4951+
4952+
/*
4953+
* If the VMSA has not yet been encrypted, return a pointer to the
4954+
* current un-encrypted VMSA.
4955+
*/
4956+
if (!vcpu->arch.guest_state_protected)
4957+
return (struct vmcb_save_area *)svm->sev_es.vmsa;
4958+
4959+
sev = to_kvm_sev_info(vcpu->kvm);
4960+
4961+
/* Check if the SEV policy allows debugging */
4962+
if (sev_snp_guest(vcpu->kvm)) {
4963+
if (!(sev->policy & SNP_POLICY_DEBUG))
4964+
return NULL;
4965+
} else {
4966+
if (sev->policy & SEV_POLICY_NODBG)
4967+
return NULL;
4968+
}
4969+
4970+
if (sev_snp_guest(vcpu->kvm)) {
4971+
struct sev_data_snp_dbg dbg = {0};
4972+
4973+
vmsa = snp_alloc_firmware_page(__GFP_ZERO);
4974+
if (!vmsa)
4975+
return NULL;
4976+
4977+
dbg.gctx_paddr = __psp_pa(sev->snp_context);
4978+
dbg.src_addr = svm->vmcb->control.vmsa_pa;
4979+
dbg.dst_addr = __psp_pa(vmsa);
4980+
4981+
ret = sev_do_cmd(SEV_CMD_SNP_DBG_DECRYPT, &dbg, &error);
4982+
4983+
/*
4984+
* Return the target page to a hypervisor page no matter what.
4985+
* If this fails, the page can't be used, so leak it and don't
4986+
* try to use it.
4987+
*/
4988+
if (snp_page_reclaim(vcpu->kvm, PHYS_PFN(__pa(vmsa))))
4989+
return NULL;
4990+
4991+
if (ret) {
4992+
pr_err("SEV: SNP_DBG_DECRYPT failed ret=%d, fw_error=%d (%#x)\n",
4993+
ret, error, error);
4994+
free_page((unsigned long)vmsa);
4995+
4996+
return NULL;
4997+
}
4998+
} else {
4999+
struct sev_data_dbg dbg = {0};
5000+
struct page *vmsa_page;
5001+
5002+
vmsa_page = alloc_page(GFP_KERNEL);
5003+
if (!vmsa_page)
5004+
return NULL;
5005+
5006+
vmsa = page_address(vmsa_page);
5007+
5008+
dbg.handle = sev->handle;
5009+
dbg.src_addr = svm->vmcb->control.vmsa_pa;
5010+
dbg.dst_addr = __psp_pa(vmsa);
5011+
dbg.len = PAGE_SIZE;
5012+
5013+
ret = sev_do_cmd(SEV_CMD_DBG_DECRYPT, &dbg, &error);
5014+
if (ret) {
5015+
pr_err("SEV: SEV_CMD_DBG_DECRYPT failed ret=%d, fw_error=%d (0x%x)\n",
5016+
ret, error, error);
5017+
__free_page(vmsa_page);
5018+
5019+
return NULL;
5020+
}
5021+
}
5022+
5023+
return vmsa;
5024+
}
5025+
5026+
void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa)
5027+
{
5028+
/* If the VMSA has not yet been encrypted, nothing was allocated */
5029+
if (!vcpu->arch.guest_state_protected || !vmsa)
5030+
return;
5031+
5032+
free_page((unsigned long)vmsa);
5033+
}

0 commit comments

Comments
 (0)