Skip to content

Commit ec0ab05

Browse files
Marc Zyngieroupton
authored andcommitted
KVM: arm64: Fix vcpu_{read,write}_sys_reg() accessors
Volodymyr reports (again!) that under some circumstances (E2H==0, walking S1 PTs), PAR_EL1 doesn't report the value of the latest walk in the CPU register, but that instead the value is written to the backing store. Further investigation indicates that the root cause of this is that a group of registers (PAR_EL1, TPIDR*_EL{0,1}, the *32_EL2 dregs) should always be considered as "on CPU", as they are not remapped between EL1 and EL2. We fail to treat them accordingly, and end-up considering that the register (PAR_EL1 in this example) should be written to memory instead of in the register. While it would be possible to quickly work around it, it is obvious that the way we track these things at the moment is pretty horrible, and could do with some improvement. Revamp the whole thing by: - defining a location for a register (memory, cpu), potentially depending on the state of the vcpu - define a transformation for this register (mapped register, potential translation, special register needing some particular attention) - convey this information in a structure that can be easily passed around As a result, the accessors themselves become much simpler, as the state is explicit instead of being driven by hard-to-understand conventions. We get rid of the "pure EL2 register" notion, which wasn't very useful, and add sanitisation of the values by applying the RESx masks as required, something that was missing so far. And of course, we add the missing registers to the list, with the indication that they are always loaded. Reported-by: Volodymyr Babchuk <[email protected]> Signed-off-by: Marc Zyngier <[email protected]> Fixes: fedc612 ("KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()") Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Oliver Upton <[email protected]>
1 parent e3f6836 commit ec0ab05

File tree

2 files changed

+162
-112
lines changed

2 files changed

+162
-112
lines changed

arch/arm64/include/asm/kvm_host.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,8 +1158,8 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
11581158
__v; \
11591159
})
11601160

1161-
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
1162-
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
1161+
u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
1162+
void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg);
11631163

11641164
static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
11651165
{

arch/arm64/kvm/sys_regs.c

Lines changed: 160 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -82,43 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
8282
"sys_reg write to read-only register");
8383
}
8484

85-
#define PURE_EL2_SYSREG(el2) \
86-
case el2: { \
87-
*el1r = el2; \
88-
return true; \
85+
enum sr_loc_attr {
86+
SR_LOC_MEMORY = 0, /* Register definitely in memory */
87+
SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */
88+
SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */
89+
SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */
90+
SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */
91+
};
92+
93+
struct sr_loc {
94+
enum sr_loc_attr loc;
95+
enum vcpu_sysreg map_reg;
96+
u64 (*xlate)(u64);
97+
};
98+
99+
static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu,
100+
enum vcpu_sysreg reg)
101+
{
102+
switch (reg) {
103+
case SCTLR_EL1:
104+
case CPACR_EL1:
105+
case TTBR0_EL1:
106+
case TTBR1_EL1:
107+
case TCR_EL1:
108+
case TCR2_EL1:
109+
case PIR_EL1:
110+
case PIRE0_EL1:
111+
case POR_EL1:
112+
case ESR_EL1:
113+
case AFSR0_EL1:
114+
case AFSR1_EL1:
115+
case FAR_EL1:
116+
case MAIR_EL1:
117+
case VBAR_EL1:
118+
case CONTEXTIDR_EL1:
119+
case AMAIR_EL1:
120+
case CNTKCTL_EL1:
121+
case ELR_EL1:
122+
case SPSR_EL1:
123+
case ZCR_EL1:
124+
case SCTLR2_EL1:
125+
/*
126+
* EL1 registers which have an ELx2 mapping are loaded if
127+
* we're not in hypervisor context.
128+
*/
129+
return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED;
130+
131+
case TPIDR_EL0:
132+
case TPIDRRO_EL0:
133+
case TPIDR_EL1:
134+
case PAR_EL1:
135+
case DACR32_EL2:
136+
case IFSR32_EL2:
137+
case DBGVCR32_EL2:
138+
/* These registers are always loaded, no matter what */
139+
return SR_LOC_LOADED;
140+
141+
default:
142+
/* Non-mapped EL2 registers are by definition in memory. */
143+
return SR_LOC_MEMORY;
89144
}
145+
}
90146

91-
#define MAPPED_EL2_SYSREG(el2, el1, fn) \
92-
case el2: { \
93-
*xlate = fn; \
94-
*el1r = el1; \
95-
return true; \
147+
static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu,
148+
enum vcpu_sysreg reg,
149+
enum vcpu_sysreg map_reg,
150+
u64 (*xlate)(u64),
151+
struct sr_loc *loc)
152+
{
153+
if (!is_hyp_ctxt(vcpu)) {
154+
loc->loc = SR_LOC_MEMORY;
155+
return;
96156
}
97157

98-
static bool get_el2_to_el1_mapping(unsigned int reg,
99-
unsigned int *el1r, u64 (**xlate)(u64))
158+
loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED;
159+
loc->map_reg = map_reg;
160+
161+
WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY);
162+
163+
if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) {
164+
loc->loc |= SR_LOC_XLATED;
165+
loc->xlate = xlate;
166+
}
167+
}
168+
169+
#define MAPPED_EL2_SYSREG(r, m, t) \
170+
case r: { \
171+
locate_mapped_el2_register(vcpu, r, m, t, loc); \
172+
break; \
173+
}
174+
175+
static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg,
176+
struct sr_loc *loc)
100177
{
178+
if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) {
179+
loc->loc = SR_LOC_MEMORY;
180+
return;
181+
}
182+
101183
switch (reg) {
102-
PURE_EL2_SYSREG( VPIDR_EL2 );
103-
PURE_EL2_SYSREG( VMPIDR_EL2 );
104-
PURE_EL2_SYSREG( ACTLR_EL2 );
105-
PURE_EL2_SYSREG( HCR_EL2 );
106-
PURE_EL2_SYSREG( MDCR_EL2 );
107-
PURE_EL2_SYSREG( HSTR_EL2 );
108-
PURE_EL2_SYSREG( HACR_EL2 );
109-
PURE_EL2_SYSREG( VTTBR_EL2 );
110-
PURE_EL2_SYSREG( VTCR_EL2 );
111-
PURE_EL2_SYSREG( TPIDR_EL2 );
112-
PURE_EL2_SYSREG( HPFAR_EL2 );
113-
PURE_EL2_SYSREG( HCRX_EL2 );
114-
PURE_EL2_SYSREG( HFGRTR_EL2 );
115-
PURE_EL2_SYSREG( HFGWTR_EL2 );
116-
PURE_EL2_SYSREG( HFGITR_EL2 );
117-
PURE_EL2_SYSREG( HDFGRTR_EL2 );
118-
PURE_EL2_SYSREG( HDFGWTR_EL2 );
119-
PURE_EL2_SYSREG( HAFGRTR_EL2 );
120-
PURE_EL2_SYSREG( CNTVOFF_EL2 );
121-
PURE_EL2_SYSREG( CNTHCTL_EL2 );
122184
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
123185
translate_sctlr_el2_to_sctlr_el1 );
124186
MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1,
@@ -144,125 +206,113 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
144206
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
145207
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
146208
MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
209+
case CNTHCTL_EL2:
210+
/* CNTHCTL_EL2 is super special, until we support NV2.1 */
211+
loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ?
212+
SR_LOC_SPECIAL : SR_LOC_MEMORY);
213+
break;
147214
default:
148-
return false;
215+
loc->loc = locate_direct_register(vcpu, reg);
149216
}
150217
}
151218

152-
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
219+
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg)
153220
{
154-
u64 val = 0x8badf00d8badf00d;
155-
u64 (*xlate)(u64) = NULL;
156-
unsigned int el1r;
221+
struct sr_loc loc = {};
222+
223+
locate_register(vcpu, reg, &loc);
224+
225+
WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY);
157226

158-
if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
159-
goto memory_read;
227+
if (loc.loc & SR_LOC_SPECIAL) {
228+
u64 val;
160229

161-
if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
162-
if (!is_hyp_ctxt(vcpu))
163-
goto memory_read;
230+
WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL);
164231

165232
/*
166-
* CNTHCTL_EL2 requires some special treatment to
167-
* account for the bits that can be set via CNTKCTL_EL1.
233+
* CNTHCTL_EL2 requires some special treatment to account
234+
* for the bits that can be set via CNTKCTL_EL1 when E2H==1.
168235
*/
169236
switch (reg) {
170237
case CNTHCTL_EL2:
171-
if (vcpu_el2_e2h_is_set(vcpu)) {
172-
val = read_sysreg_el1(SYS_CNTKCTL);
173-
val &= CNTKCTL_VALID_BITS;
174-
val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
175-
return val;
176-
}
177-
break;
238+
val = read_sysreg_el1(SYS_CNTKCTL);
239+
val &= CNTKCTL_VALID_BITS;
240+
val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
241+
return val;
242+
default:
243+
WARN_ON_ONCE(1);
178244
}
245+
}
179246

180-
/*
181-
* If this register does not have an EL1 counterpart,
182-
* then read the stored EL2 version.
183-
*/
184-
if (reg == el1r)
185-
goto memory_read;
247+
if (loc.loc & SR_LOC_LOADED) {
248+
enum vcpu_sysreg map_reg = reg;
249+
u64 val = 0x8badf00d8badf00d;
186250

187-
/*
188-
* If we have a non-VHE guest and that the sysreg
189-
* requires translation to be used at EL1, use the
190-
* in-memory copy instead.
191-
*/
192-
if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
193-
goto memory_read;
251+
if (loc.loc & SR_LOC_MAPPED)
252+
map_reg = loc.map_reg;
194253

195-
/* Get the current version of the EL1 counterpart. */
196-
WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
197-
if (reg >= __SANITISED_REG_START__)
198-
val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
254+
if (!(loc.loc & SR_LOC_XLATED) &&
255+
__vcpu_read_sys_reg_from_cpu(map_reg, &val)) {
256+
if (reg >= __SANITISED_REG_START__)
257+
val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
199258

200-
return val;
259+
return val;
260+
}
201261
}
202262

203-
/* EL1 register can't be on the CPU if the guest is in vEL2. */
204-
if (unlikely(is_hyp_ctxt(vcpu)))
205-
goto memory_read;
206-
207-
if (__vcpu_read_sys_reg_from_cpu(reg, &val))
208-
return val;
209-
210-
memory_read:
211263
return __vcpu_sys_reg(vcpu, reg);
212264
}
213265

214-
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
266+
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg)
215267
{
216-
u64 (*xlate)(u64) = NULL;
217-
unsigned int el1r;
268+
struct sr_loc loc = {};
218269

219-
if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
220-
goto memory_write;
270+
locate_register(vcpu, reg, &loc);
221271

222-
if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
223-
if (!is_hyp_ctxt(vcpu))
224-
goto memory_write;
272+
WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY);
225273

226-
/*
227-
* Always store a copy of the write to memory to avoid having
228-
* to reverse-translate virtual EL2 system registers for a
229-
* non-VHE guest hypervisor.
230-
*/
231-
__vcpu_assign_sys_reg(vcpu, reg, val);
274+
if (loc.loc & SR_LOC_SPECIAL) {
275+
276+
WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL);
232277

233278
switch (reg) {
234279
case CNTHCTL_EL2:
235280
/*
236-
* If E2H=0, CNHTCTL_EL2 is a pure shadow register.
237-
* Otherwise, some of the bits are backed by
281+
* If E2H=1, some of the bits are backed by
238282
* CNTKCTL_EL1, while the rest is kept in memory.
239283
* Yes, this is fun stuff.
240284
*/
241-
if (vcpu_el2_e2h_is_set(vcpu))
242-
write_sysreg_el1(val, SYS_CNTKCTL);
243-
return;
285+
write_sysreg_el1(val, SYS_CNTKCTL);
286+
break;
287+
default:
288+
WARN_ON_ONCE(1);
244289
}
290+
}
245291

246-
/* No EL1 counterpart? We're done here.? */
247-
if (reg == el1r)
248-
return;
292+
if (loc.loc & SR_LOC_LOADED) {
293+
enum vcpu_sysreg map_reg = reg;
294+
u64 xlated_val;
249295

250-
if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
251-
val = xlate(val);
296+
if (reg >= __SANITISED_REG_START__)
297+
val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
252298

253-
/* Redirect this to the EL1 version of the register. */
254-
WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r));
255-
return;
256-
}
299+
if (loc.loc & SR_LOC_MAPPED)
300+
map_reg = loc.map_reg;
257301

258-
/* EL1 register can't be on the CPU if the guest is in vEL2. */
259-
if (unlikely(is_hyp_ctxt(vcpu)))
260-
goto memory_write;
302+
if (loc.loc & SR_LOC_XLATED)
303+
xlated_val = loc.xlate(val);
304+
else
305+
xlated_val = val;
261306

262-
if (__vcpu_write_sys_reg_to_cpu(val, reg))
263-
return;
307+
__vcpu_write_sys_reg_to_cpu(xlated_val, map_reg);
308+
309+
/*
310+
* Fall through to write the backing store anyway, which
311+
* allows translated registers to be directly read without a
312+
* reverse translation.
313+
*/
314+
}
264315

265-
memory_write:
266316
__vcpu_assign_sys_reg(vcpu, reg, val);
267317
}
268318

0 commit comments

Comments
 (0)