Skip to content

Commit 54a1a24

Browse files
committed
KVM: x86: Unify cross-vCPU IBPB
Both SVM and VMX have similar implementation for executing an IBPB between running different vCPUs on the same CPU to create separate prediction domains for different vCPUs. For VMX, when the currently loaded VMCS is changed in vmx_vcpu_load_vmcs(), an IBPB is executed if there is no 'buddy', which is the case on vCPU load. The intention is to execute an IBPB when switching vCPUs, but not when switching the VMCS within the same vCPU. Executing an IBPB on nested transitions within the same vCPU is handled separately and conditionally in nested_vmx_vmexit(). For SVM, the current VMCB is tracked on vCPU load and an IBPB is executed when it is changed. The intention is also to execute an IBPB when switching vCPUs, although it is possible that in some cases an IBBP is executed when switching VMCBs for the same vCPU. Executing an IBPB on nested transitions should be handled separately, and is proposed at [1]. Unify the logic by tracking the last loaded vCPU and execuintg the IBPB on vCPU change in kvm_arch_vcpu_load() instead. When a vCPU is destroyed, make sure all references to it are removed from any CPU. This is similar to how SVM clears the current_vmcb tracking on vCPU destruction. Remove the current VMCB tracking in SVM as it is no longer required, as well as the 'buddy' parameter to vmx_vcpu_load_vmcs(). [1] https://lore.kernel.org/lkml/[email protected] Link: https://lore.kernel.org/all/[email protected] Signed-off-by: Yosry Ahmed <[email protected]> [sean: tweak comment to stay at/under 80 columns] Signed-off-by: Sean Christopherson <[email protected]>
1 parent 1bee483 commit 54a1a24

File tree

6 files changed

+25
-47
lines changed

6 files changed

+25
-47
lines changed

arch/x86/kvm/svm/svm.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,25 +1492,10 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
14921492
return err;
14931493
}
14941494

1495-
static void svm_clear_current_vmcb(struct vmcb *vmcb)
1496-
{
1497-
int i;
1498-
1499-
for_each_possible_cpu(i)
1500-
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
1501-
}
1502-
15031495
static void svm_vcpu_free(struct kvm_vcpu *vcpu)
15041496
{
15051497
struct vcpu_svm *svm = to_svm(vcpu);
15061498

1507-
/*
1508-
* The vmcb page can be recycled, causing a false negative in
1509-
* svm_vcpu_load(). So, ensure that no logical CPU has this
1510-
* vmcb page recorded as its current vmcb.
1511-
*/
1512-
svm_clear_current_vmcb(svm->vmcb);
1513-
15141499
svm_leave_nested(vcpu);
15151500
svm_free_nested(svm);
15161501

@@ -1562,19 +1547,9 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
15621547

15631548
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
15641549
{
1565-
struct vcpu_svm *svm = to_svm(vcpu);
1566-
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
1567-
15681550
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
15691551
shrink_ple_window(vcpu);
15701552

1571-
if (sd->current_vmcb != svm->vmcb) {
1572-
sd->current_vmcb = svm->vmcb;
1573-
1574-
if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) &&
1575-
static_branch_likely(&switch_vcpu_ibpb))
1576-
indirect_branch_prediction_barrier();
1577-
}
15781553
if (kvm_vcpu_apicv_active(vcpu))
15791554
avic_vcpu_load(vcpu, cpu);
15801555
}

arch/x86/kvm/svm/svm.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,6 @@ struct svm_cpu_data {
338338
struct vmcb *save_area;
339339
unsigned long save_area_pa;
340340

341-
struct vmcb *current_vmcb;
342-
343341
/* index = sev_asid, value = vmcb pointer */
344342
struct vmcb **sev_vmcbs;
345343
};

arch/x86/kvm/vmx/nested.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
301301
cpu = get_cpu();
302302
prev = vmx->loaded_vmcs;
303303
vmx->loaded_vmcs = vmcs;
304-
vmx_vcpu_load_vmcs(vcpu, cpu, prev);
304+
vmx_vcpu_load_vmcs(vcpu, cpu);
305305
vmx_sync_vmcs_host_state(vmx, prev);
306306
put_cpu();
307307

@@ -4520,12 +4520,12 @@ static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
45204520

45214521
cpu = get_cpu();
45224522
vmx->loaded_vmcs = &vmx->nested.vmcs02;
4523-
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
4523+
vmx_vcpu_load_vmcs(vcpu, cpu);
45244524

45254525
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
45264526

45274527
vmx->loaded_vmcs = &vmx->vmcs01;
4528-
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
4528+
vmx_vcpu_load_vmcs(vcpu, cpu);
45294529
put_cpu();
45304530
}
45314531

arch/x86/kvm/vmx/vmx.c

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,8 +1445,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
14451445
}
14461446
}
14471447

1448-
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
1449-
struct loaded_vmcs *buddy)
1448+
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
14501449
{
14511450
struct vcpu_vmx *vmx = to_vmx(vcpu);
14521451
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
@@ -1473,17 +1472,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
14731472
if (prev != vmx->loaded_vmcs->vmcs) {
14741473
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
14751474
vmcs_load(vmx->loaded_vmcs->vmcs);
1476-
1477-
/*
1478-
* No indirect branch prediction barrier needed when switching
1479-
* the active VMCS within a vCPU, unless IBRS is advertised to
1480-
* the vCPU. To minimize the number of IBPBs executed, KVM
1481-
* performs IBPB on nested VM-Exit (a single nested transition
1482-
* may switch the active VMCS multiple times).
1483-
*/
1484-
if (static_branch_likely(&switch_vcpu_ibpb) &&
1485-
(!buddy || WARN_ON_ONCE(buddy->vmcs != prev)))
1486-
indirect_branch_prediction_barrier();
14871475
}
14881476

14891477
if (!already_loaded) {
@@ -1522,7 +1510,7 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
15221510
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
15231511
shrink_ple_window(vcpu);
15241512

1525-
vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
1513+
vmx_vcpu_load_vmcs(vcpu, cpu);
15261514

15271515
vmx_vcpu_pi_load(vcpu, cpu);
15281516
}

arch/x86/kvm/vmx/vmx.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,7 @@ static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
354354
return vt->exit_intr_info;
355355
}
356356

357-
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
358-
struct loaded_vmcs *buddy);
357+
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
359358
int allocate_vpid(void);
360359
void free_vpid(int vpid);
361360
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);

arch/x86/kvm/x86.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4991,6 +4991,8 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
49914991
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
49924992
}
49934993

4994+
static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
4995+
49944996
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
49954997
{
49964998
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -5013,6 +5015,19 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
50135015

50145016
kvm_x86_call(vcpu_load)(vcpu, cpu);
50155017

5018+
if (vcpu != per_cpu(last_vcpu, cpu)) {
5019+
/*
5020+
* Flush the branch predictor when switching vCPUs on the same
5021+
* physical CPU, as each vCPU needs its own branch prediction
5022+
* domain. No IBPB is needed when switching between L1 and L2
5023+
* on the same vCPU unless IBRS is advertised to the vCPU; that
5024+
* is handled on the nested VM-Exit path.
5025+
*/
5026+
if (static_branch_likely(&switch_vcpu_ibpb))
5027+
indirect_branch_prediction_barrier();
5028+
per_cpu(last_vcpu, cpu) = vcpu;
5029+
}
5030+
50165031
/* Save host pkru register if supported */
50175032
vcpu->arch.host_pkru = read_pkru();
50185033

@@ -12424,13 +12439,16 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1242412439

1242512440
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1242612441
{
12427-
int idx;
12442+
int idx, cpu;
1242812443

1242912444
kvm_clear_async_pf_completion_queue(vcpu);
1243012445
kvm_mmu_unload(vcpu);
1243112446

1243212447
kvmclock_reset(vcpu);
1243312448

12449+
for_each_possible_cpu(cpu)
12450+
cmpxchg(per_cpu_ptr(&last_vcpu, cpu), vcpu, NULL);
12451+
1243412452
kvm_x86_call(vcpu_free)(vcpu);
1243512453

1243612454
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);

0 commit comments

Comments
 (0)