Skip to content

Commit e669e32

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Fix another set of FP/SIMD/SVE bugs affecting NV, and plugging some missing synchronisation - A small fix for the irqbypass hook fixes, tightening the check and ensuring that we only deal with MSI for both the old and the new route entry - Rework the way the shadow LRs are addressed in a nesting configuration, plugging an embarrassing bug as well as simplifying the whole process - Add yet another fix for the dreaded arch_timer_edge_cases selftest RISC-V: - Fix the size parameter check in SBI SFENCE calls - Don't treat SBI HFENCE calls as NOPs x86 TDX: - Complete API for handling complex TDVMCALLs in userspace. This was delayed because the spec lacked a way for userspace to deny supporting these calls; the new exit code is now approved" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: TDX: Exit to userspace for GetTdVmCallInfo KVM: TDX: Handle TDG.VP.VMCALL<GetQuote> KVM: TDX: Add new TDVMCALL status code for unsupported subfuncs KVM: arm64: VHE: Centralize ISBs when returning to host KVM: arm64: Remove cpacr_clear_set() KVM: arm64: Remove ad-hoc CPTR manipulation from kvm_hyp_handle_fpsimd() KVM: arm64: Remove ad-hoc CPTR manipulation from fpsimd_sve_sync() KVM: arm64: Reorganise CPTR trap manipulation KVM: arm64: VHE: Synchronize CPTR trap deactivation KVM: arm64: VHE: Synchronize restore of host debug registers KVM: arm64: selftests: Close the GIC FD in arch_timer_edge_cases KVM: arm64: Explicitly treat routing entry type changes as changes KVM: arm64: nv: Fix tracking of shadow list registers RISC-V: KVM: Don't treat SBI HFENCE calls as NOPs RISC-V: KVM: Fix the size parameter check in SBI SFENCE calls
2 parents 75f99f8 + 25e8b1d commit e669e32

File tree

14 files changed

+376
-283
lines changed

14 files changed

+376
-283
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6645,7 +6645,8 @@ to the byte array.
66456645
.. note::
66466646

66476647
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
6648-
KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
6648+
KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX,
6649+
KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
66496650
operations are complete (and guest state is consistent) only after userspace
66506651
has re-entered the kernel with KVM_RUN. The kernel side will first finish
66516652
incomplete operations and then check for pending signals.
@@ -7174,6 +7175,62 @@ The valid value for 'flags' is:
71747175
- KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid
71757176
in VMCS. It would run into unknown result if resume the target VM.
71767177

7178+
::
7179+
7180+
/* KVM_EXIT_TDX */
7181+
struct {
7182+
__u64 flags;
7183+
__u64 nr;
7184+
union {
7185+
struct {
7186+
u64 ret;
7187+
u64 data[5];
7188+
} unknown;
7189+
struct {
7190+
u64 ret;
7191+
u64 gpa;
7192+
u64 size;
7193+
} get_quote;
7194+
struct {
7195+
u64 ret;
7196+
u64 leaf;
7197+
u64 r11, r12, r13, r14;
7198+
} get_tdvmcall_info;
7199+
};
7200+
} tdx;
7201+
7202+
Process a TDVMCALL from the guest. KVM forwards select TDVMCALL based
7203+
on the Guest-Hypervisor Communication Interface (GHCI) specification;
7204+
KVM bridges these requests to the userspace VMM with minimal changes,
7205+
placing the inputs in the union and copying them back to the guest
7206+
on re-entry.
7207+
7208+
Flags are currently always zero, whereas ``nr`` contains the TDVMCALL
7209+
number from register R11. The remaining field of the union provide the
7210+
inputs and outputs of the TDVMCALL. Currently the following values of
7211+
``nr`` are defined:
7212+
7213+
* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
7214+
signed by a service hosting TD-Quoting Enclave operating on the host.
7215+
Parameters and return value are in the ``get_quote`` field of the union.
7216+
The ``gpa`` field and ``size`` specify the guest physical address
7217+
(without the shared bit set) and the size of a shared-memory buffer, in
7218+
which the TDX guest passes a TD Report. The ``ret`` field represents
7219+
the return value of the GetQuote request. When the request has been
7220+
queued successfully, the TDX guest can poll the status field in the
7221+
shared-memory area to check whether the Quote generation is completed or
7222+
not. When completed, the generated Quote is returned via the same buffer.
7223+
7224+
* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
7225+
status of TDVMCALLs. The output values for the given leaf should be
7226+
placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
7227+
field of the union.
7228+
7229+
KVM may add support for more values in the future that may cause a userspace
7230+
exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
7231+
it will enter with output fields already valid; in the common case, the
7232+
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
7233+
Userspace need not do anything if it does not wish to support a TDVMCALL.
71777234
::
71787235

71797236
/* Fix the size of the union. */

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
561561
vcpu_set_flag((v), e); \
562562
} while (0)
563563

564-
#define __build_check_all_or_none(r, bits) \
565-
BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
566-
567-
#define __cpacr_to_cptr_clr(clr, set) \
568-
({ \
569-
u64 cptr = 0; \
570-
\
571-
if ((set) & CPACR_EL1_FPEN) \
572-
cptr |= CPTR_EL2_TFP; \
573-
if ((set) & CPACR_EL1_ZEN) \
574-
cptr |= CPTR_EL2_TZ; \
575-
if ((set) & CPACR_EL1_SMEN) \
576-
cptr |= CPTR_EL2_TSM; \
577-
if ((clr) & CPACR_EL1_TTA) \
578-
cptr |= CPTR_EL2_TTA; \
579-
if ((clr) & CPTR_EL2_TAM) \
580-
cptr |= CPTR_EL2_TAM; \
581-
if ((clr) & CPTR_EL2_TCPAC) \
582-
cptr |= CPTR_EL2_TCPAC; \
583-
\
584-
cptr; \
585-
})
586-
587-
#define __cpacr_to_cptr_set(clr, set) \
588-
({ \
589-
u64 cptr = 0; \
590-
\
591-
if ((clr) & CPACR_EL1_FPEN) \
592-
cptr |= CPTR_EL2_TFP; \
593-
if ((clr) & CPACR_EL1_ZEN) \
594-
cptr |= CPTR_EL2_TZ; \
595-
if ((clr) & CPACR_EL1_SMEN) \
596-
cptr |= CPTR_EL2_TSM; \
597-
if ((set) & CPACR_EL1_TTA) \
598-
cptr |= CPTR_EL2_TTA; \
599-
if ((set) & CPTR_EL2_TAM) \
600-
cptr |= CPTR_EL2_TAM; \
601-
if ((set) & CPTR_EL2_TCPAC) \
602-
cptr |= CPTR_EL2_TCPAC; \
603-
\
604-
cptr; \
605-
})
606-
607-
#define cpacr_clear_set(clr, set) \
608-
do { \
609-
BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
610-
BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \
611-
__build_check_all_or_none((clr), CPACR_EL1_FPEN); \
612-
__build_check_all_or_none((set), CPACR_EL1_FPEN); \
613-
__build_check_all_or_none((clr), CPACR_EL1_ZEN); \
614-
__build_check_all_or_none((set), CPACR_EL1_ZEN); \
615-
__build_check_all_or_none((clr), CPACR_EL1_SMEN); \
616-
__build_check_all_or_none((set), CPACR_EL1_SMEN); \
617-
\
618-
if (has_vhe() || has_hvhe()) \
619-
sysreg_clear_set(cpacr_el1, clr, set); \
620-
else \
621-
sysreg_clear_set(cptr_el2, \
622-
__cpacr_to_cptr_clr(clr, set), \
623-
__cpacr_to_cptr_set(clr, set));\
624-
} while (0)
625-
626564
/*
627565
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
628566
* format if E2H isn't set.

arch/arm64/include/asm/kvm_host.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
12891289
})
12901290

12911291
/*
1292-
* The couple of isb() below are there to guarantee the same behaviour
1293-
* on VHE as on !VHE, where the eret to EL1 acts as a context
1294-
* synchronization event.
1292+
* The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
1293+
* where the eret to EL1 acts as a context synchronization event.
12951294
*/
12961295
#define kvm_call_hyp(f, ...) \
12971296
do { \
@@ -1309,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
13091308
\
13101309
if (has_vhe()) { \
13111310
ret = f(__VA_ARGS__); \
1312-
isb(); \
13131311
} else { \
13141312
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
13151313
} \

arch/arm64/kvm/arm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2764,7 +2764,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
27642764
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
27652765
struct kvm_kernel_irq_routing_entry *new)
27662766
{
2767-
if (new->type != KVM_IRQ_ROUTING_MSI)
2767+
if (old->type != KVM_IRQ_ROUTING_MSI ||
2768+
new->type != KVM_IRQ_ROUTING_MSI)
27682769
return true;
27692770

27702771
return memcmp(&old->msi, &new->msi, sizeof(new->msi));

arch/arm64/kvm/hyp/include/hyp/switch.h

Lines changed: 138 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
6565
}
6666
}
6767

68+
static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
69+
{
70+
u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA;
71+
72+
/*
73+
* Always trap SME since it's not supported in KVM.
74+
* TSM is RES1 if SME isn't implemented.
75+
*/
76+
val |= CPTR_EL2_TSM;
77+
78+
if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
79+
val |= CPTR_EL2_TZ;
80+
81+
if (!guest_owns_fp_regs())
82+
val |= CPTR_EL2_TFP;
83+
84+
write_sysreg(val, cptr_el2);
85+
}
86+
87+
static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
88+
{
89+
/*
90+
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
91+
* CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
92+
* except for some missing controls, such as TAM.
93+
* In this case, CPTR_EL2.TAM has the same position with or without
94+
* VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
95+
* shift value for trapping the AMU accesses.
96+
*/
97+
u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA;
98+
u64 cptr;
99+
100+
if (guest_owns_fp_regs()) {
101+
val |= CPACR_EL1_FPEN;
102+
if (vcpu_has_sve(vcpu))
103+
val |= CPACR_EL1_ZEN;
104+
}
105+
106+
if (!vcpu_has_nv(vcpu))
107+
goto write;
108+
109+
/*
110+
* The architecture is a bit crap (what a surprise): an EL2 guest
111+
* writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
112+
* as they are RES0 in the guest's view. To work around it, trap the
113+
* sucker using the very same bit it can't set...
114+
*/
115+
if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
116+
val |= CPTR_EL2_TCPAC;
117+
118+
/*
119+
* Layer the guest hypervisor's trap configuration on top of our own if
120+
* we're in a nested context.
121+
*/
122+
if (is_hyp_ctxt(vcpu))
123+
goto write;
124+
125+
cptr = vcpu_sanitised_cptr_el2(vcpu);
126+
127+
/*
128+
* Pay attention, there's some interesting detail here.
129+
*
130+
* The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
131+
* meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
132+
*
133+
* - CPTR_EL2.xEN = x0, traps are enabled
134+
* - CPTR_EL2.xEN = x1, traps are disabled
135+
*
136+
* In other words, bit[0] determines if guest accesses trap or not. In
137+
* the interest of simplicity, clear the entire field if the guest
138+
* hypervisor has traps enabled to dispel any illusion of something more
139+
* complicated taking place.
140+
*/
141+
if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
142+
val &= ~CPACR_EL1_FPEN;
143+
if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
144+
val &= ~CPACR_EL1_ZEN;
145+
146+
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
147+
val |= cptr & CPACR_EL1_E0POE;
148+
149+
val |= cptr & CPTR_EL2_TCPAC;
150+
151+
write:
152+
write_sysreg(val, cpacr_el1);
153+
}
154+
155+
static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu)
156+
{
157+
if (!guest_owns_fp_regs())
158+
__activate_traps_fpsimd32(vcpu);
159+
160+
if (has_vhe() || has_hvhe())
161+
__activate_cptr_traps_vhe(vcpu);
162+
else
163+
__activate_cptr_traps_nvhe(vcpu);
164+
}
165+
166+
static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
167+
{
168+
u64 val = CPTR_NVHE_EL2_RES1;
169+
170+
if (!cpus_have_final_cap(ARM64_SVE))
171+
val |= CPTR_EL2_TZ;
172+
if (!cpus_have_final_cap(ARM64_SME))
173+
val |= CPTR_EL2_TSM;
174+
175+
write_sysreg(val, cptr_el2);
176+
}
177+
178+
static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
179+
{
180+
u64 val = CPACR_EL1_FPEN;
181+
182+
if (cpus_have_final_cap(ARM64_SVE))
183+
val |= CPACR_EL1_ZEN;
184+
if (cpus_have_final_cap(ARM64_SME))
185+
val |= CPACR_EL1_SMEN;
186+
187+
write_sysreg(val, cpacr_el1);
188+
}
189+
190+
static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
191+
{
192+
if (has_vhe() || has_hvhe())
193+
__deactivate_cptr_traps_vhe(vcpu);
194+
else
195+
__deactivate_cptr_traps_nvhe(vcpu);
196+
}
197+
68198
#define reg_to_fgt_masks(reg) \
69199
({ \
70200
struct fgt_masks *m; \
@@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
486616
*/
487617
if (system_supports_sve()) {
488618
__hyp_sve_save_host();
489-
490-
/* Re-enable SVE traps if not supported for the guest vcpu. */
491-
if (!vcpu_has_sve(vcpu))
492-
cpacr_clear_set(CPACR_EL1_ZEN, 0);
493-
494619
} else {
495620
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
496621
}
@@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
541666
/* Valid trap. Switch the context: */
542667

543668
/* First disable enough traps to allow us to update the registers */
544-
if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
545-
cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
546-
else
547-
cpacr_clear_set(0, CPACR_EL1_FPEN);
669+
__deactivate_cptr_traps(vcpu);
548670
isb();
549671

550672
/* Write out the host state if it's in the registers */
@@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
566688

567689
*host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
568690

691+
/*
692+
* Re-enable traps necessary for the current state of the guest, e.g.
693+
* those enabled by a guest hypervisor. The ERET to the guest will
694+
* provide the necessary context synchronization.
695+
*/
696+
__activate_cptr_traps(vcpu);
697+
569698
return true;
570699
}
571700

arch/arm64/kvm/hyp/nvhe/hyp-main.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
6969
if (!guest_owns_fp_regs())
7070
return;
7171

72-
cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
72+
/*
73+
* Traps have been disabled by __deactivate_cptr_traps(), but there
74+
* hasn't necessarily been a context synchronization event yet.
75+
*/
7376
isb();
7477

7578
if (vcpu_has_sve(vcpu))

0 commit comments

Comments
 (0)