Skip to content

Commit 2f67381

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "Bugfixes, including a TLB flush fix that affects processors without nested page tables" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: fix previous commit for 32-bit builds kvm: avoid speculation-based attacks from out-of-range memslot accesses KVM: x86: Unload MMU on guest TLB flush if TDP disabled to force MMU sync KVM: x86: Ensure liveliness of nested VM-Enter fail tracepoint message selftests: kvm: Add support for customized slot0 memory size KVM: selftests: introduce P47V64 for s390x KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior KVM: X86: MMU: Use the correct inherited permissions to get shadow page KVM: LAPIC: Write 0 to TMICT should also cancel vmx-preemption timer KVM: SVM: Fix SEV SEND_START session length & SEND_UPDATE_DATA query length after commit 238eca8
2 parents 368094d + 4422829 commit 2f67381

File tree

12 files changed

+105
-39
lines changed

12 files changed

+105
-39
lines changed

Documentation/virt/kvm/mmu.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@ Shadow pages contain the following information:
171171
shadow pages) so role.quadrant takes values in the range 0..3. Each
172172
quadrant maps 1GB virtual address space.
173173
role.access:
174-
Inherited guest access permissions in the form uwx. Note execute
175-
permission is positive, not negative.
174+
Inherited guest access permissions from the parent ptes in the form uwx.
175+
Note execute permission is positive, not negative.
176176
role.invalid:
177177
The page is invalid and should not be used. It is a root page that is
178178
currently pinned (by a cpu hardware register pointing to it); once it is

arch/x86/kvm/lapic.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1494,6 +1494,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
14941494

14951495
static void cancel_hv_timer(struct kvm_lapic *apic);
14961496

1497+
static void cancel_apic_timer(struct kvm_lapic *apic)
1498+
{
1499+
hrtimer_cancel(&apic->lapic_timer.timer);
1500+
preempt_disable();
1501+
if (apic->lapic_timer.hv_timer_in_use)
1502+
cancel_hv_timer(apic);
1503+
preempt_enable();
1504+
}
1505+
14971506
static void apic_update_lvtt(struct kvm_lapic *apic)
14981507
{
14991508
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1502,11 +1511,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
15021511
if (apic->lapic_timer.timer_mode != timer_mode) {
15031512
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
15041513
APIC_LVT_TIMER_TSCDEADLINE)) {
1505-
hrtimer_cancel(&apic->lapic_timer.timer);
1506-
preempt_disable();
1507-
if (apic->lapic_timer.hv_timer_in_use)
1508-
cancel_hv_timer(apic);
1509-
preempt_enable();
1514+
cancel_apic_timer(apic);
15101515
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
15111516
apic->lapic_timer.period = 0;
15121517
apic->lapic_timer.tscdeadline = 0;
@@ -2092,7 +2097,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
20922097
if (apic_lvtt_tscdeadline(apic))
20932098
break;
20942099

2095-
hrtimer_cancel(&apic->lapic_timer.timer);
2100+
cancel_apic_timer(apic);
20962101
kvm_lapic_set_reg(apic, APIC_TMICT, val);
20972102
start_apic_timer(apic);
20982103
break;

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ struct guest_walker {
9090
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
9191
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
9292
bool pte_writable[PT_MAX_FULL_LEVELS];
93-
unsigned pt_access;
94-
unsigned pte_access;
93+
unsigned int pt_access[PT_MAX_FULL_LEVELS];
94+
unsigned int pte_access;
9595
gfn_t gfn;
9696
struct x86_exception fault;
9797
};
@@ -418,13 +418,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
418418
}
419419

420420
walker->ptes[walker->level - 1] = pte;
421+
422+
/* Convert to ACC_*_MASK flags for struct guest_walker. */
423+
walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
421424
} while (!is_last_gpte(mmu, walker->level, pte));
422425

423426
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
424427
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
425428

426429
/* Convert to ACC_*_MASK flags for struct guest_walker. */
427-
walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
428430
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
429431
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
430432
if (unlikely(errcode))
@@ -463,7 +465,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
463465
}
464466

465467
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
466-
__func__, (u64)pte, walker->pte_access, walker->pt_access);
468+
__func__, (u64)pte, walker->pte_access,
469+
walker->pt_access[walker->level - 1]);
467470
return 1;
468471

469472
error:
@@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
643646
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
644647
struct kvm_mmu_page *sp = NULL;
645648
struct kvm_shadow_walk_iterator it;
646-
unsigned direct_access, access = gw->pt_access;
649+
unsigned int direct_access, access;
647650
int top_level, level, req_level, ret;
648651
gfn_t base_gfn = gw->gfn;
649652

@@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
675678
sp = NULL;
676679
if (!is_shadow_present_pte(*it.sptep)) {
677680
table_gfn = gw->table_gfn[it.level - 2];
681+
access = gw->pt_access[it.level - 2];
678682
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
679683
false, access);
680684
}

arch/x86/kvm/svm/sev.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,10 +1103,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
11031103
struct sev_data_send_start data;
11041104
int ret;
11051105

1106+
memset(&data, 0, sizeof(data));
11061107
data.handle = sev->handle;
11071108
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
1108-
if (ret < 0)
1109-
return ret;
11101109

11111110
params->session_len = data.session_len;
11121111
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
@@ -1215,10 +1214,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
12151214
struct sev_data_send_update_data data;
12161215
int ret;
12171216

1217+
memset(&data, 0, sizeof(data));
12181218
data.handle = sev->handle;
12191219
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
1220-
if (ret < 0)
1221-
return ret;
12221220

12231221
params->hdr_len = data.hdr_len;
12241222
params->trans_len = data.trans_len;

arch/x86/kvm/trace.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
15501550
TP_ARGS(msg, err),
15511551

15521552
TP_STRUCT__entry(
1553-
__field(const char *, msg)
1553+
__string(msg, msg)
15541554
__field(u32, err)
15551555
),
15561556

15571557
TP_fast_assign(
1558-
__entry->msg = msg;
1558+
__assign_str(msg, msg);
15591559
__entry->err = err;
15601560
),
15611561

1562-
TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
1562+
TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
15631563
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
15641564
);
15651565

arch/x86/kvm/x86.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
30723072
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
30733073
{
30743074
++vcpu->stat.tlb_flush;
3075+
3076+
if (!tdp_enabled) {
3077+
/*
3078+
* A TLB flush on behalf of the guest is equivalent to
3079+
* INVPCID(all), toggling CR4.PGE, etc., which requires
3080+
* a forced sync of the shadow page tables. Unload the
3081+
* entire MMU here and the subsequent load will sync the
3082+
* shadow page tables, and also flush the TLB.
3083+
*/
3084+
kvm_mmu_unload(vcpu);
3085+
return;
3086+
}
3087+
30753088
static_call(kvm_x86_tlb_flush_guest)(vcpu);
30763089
}
30773090

@@ -3101,9 +3114,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
31013114
* expensive IPIs.
31023115
*/
31033116
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3117+
u8 st_preempted = xchg(&st->preempted, 0);
3118+
31043119
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3105-
st->preempted & KVM_VCPU_FLUSH_TLB);
3106-
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
3120+
st_preempted & KVM_VCPU_FLUSH_TLB);
3121+
if (st_preempted & KVM_VCPU_FLUSH_TLB)
31073122
kvm_vcpu_flush_tlb_guest(vcpu);
31083123
} else {
31093124
st->preempted = 0;

include/linux/kvm_host.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1185,7 +1185,15 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
11851185
static inline unsigned long
11861186
__gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
11871187
{
1188-
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
1188+
/*
1189+
* The index was checked originally in search_memslots. To avoid
1190+
* that a malicious guest builds a Spectre gadget out of e.g. page
1191+
* table walks, do not let the processor speculate loads outside
1192+
* the guest's registered memslots.
1193+
*/
1194+
unsigned long offset = gfn - slot->base_gfn;
1195+
offset = array_index_nospec(offset, slot->npages);
1196+
return slot->userspace_addr + offset * PAGE_SIZE;
11891197
}
11901198

11911199
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)

tools/testing/selftests/kvm/include/kvm_util.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ enum vm_guest_mode {
4343
VM_MODE_P40V48_4K,
4444
VM_MODE_P40V48_64K,
4545
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
46+
VM_MODE_P47V64_4K,
4647
NUM_VM_MODES,
4748
};
4849

@@ -60,7 +61,7 @@ enum vm_guest_mode {
6061

6162
#elif defined(__s390x__)
6263

63-
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
64+
#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
6465
#define MIN_PAGE_SHIFT 12U
6566
#define ptes_per_page(page_size) ((page_size) / 16)
6667

@@ -285,10 +286,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
285286
uint32_t num_percpu_pages, void *guest_code,
286287
uint32_t vcpuids[]);
287288

288-
/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
289+
/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
289290
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
290-
uint64_t extra_mem_pages, uint32_t num_percpu_pages,
291-
void *guest_code, uint32_t vcpuids[]);
291+
uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
292+
uint32_t num_percpu_pages, void *guest_code,
293+
uint32_t vcpuids[]);
292294

293295
/*
294296
* Adds a vCPU with reasonable defaults (e.g. a stack)

tools/testing/selftests/kvm/kvm_page_table_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
268268

269269
/* Create a VM with enough guest pages */
270270
guest_num_pages = test_mem_size / guest_page_size;
271-
vm = vm_create_with_vcpus(mode, nr_vcpus,
271+
vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
272272
guest_num_pages, 0, guest_code, NULL);
273273

274274
/* Align down GPA of the testing memslot */

tools/testing/selftests/kvm/lib/kvm_util.c

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i)
175175
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
176176
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
177177
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
178+
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
178179
};
179180
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
180181
"Missing new mode strings?");
@@ -192,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
192193
{ 40, 48, 0x1000, 12 },
193194
{ 40, 48, 0x10000, 16 },
194195
{ 0, 0, 0x1000, 12 },
196+
{ 47, 64, 0x1000, 12 },
195197
};
196198
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
197199
"Missing new mode params?");
@@ -277,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
277279
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
278280
#endif
279281
break;
282+
case VM_MODE_P47V64_4K:
283+
vm->pgtable_levels = 5;
284+
break;
280285
default:
281286
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
282287
}
@@ -308,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
308313
return vm;
309314
}
310315

316+
/*
317+
* VM Create with customized parameters
318+
*
319+
* Input Args:
320+
* mode - VM Mode (e.g. VM_MODE_P52V48_4K)
321+
* nr_vcpus - VCPU count
322+
* slot0_mem_pages - Slot0 physical memory size
323+
* extra_mem_pages - Non-slot0 physical memory total size
324+
* num_percpu_pages - Per-cpu physical memory pages
325+
* guest_code - Guest entry point
326+
* vcpuids - VCPU IDs
327+
*
328+
* Output Args: None
329+
*
330+
* Return:
331+
* Pointer to opaque structure that describes the created VM.
332+
*
333+
* Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
334+
* with customized slot0 memory size, at least 512 pages currently.
335+
* extra_mem_pages is only used to calculate the maximum page table size,
336+
* no real memory allocation for non-slot0 memory in this function.
337+
*/
311338
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
312-
uint64_t extra_mem_pages, uint32_t num_percpu_pages,
313-
void *guest_code, uint32_t vcpuids[])
339+
uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
340+
uint32_t num_percpu_pages, void *guest_code,
341+
uint32_t vcpuids[])
314342
{
343+
uint64_t vcpu_pages, extra_pg_pages, pages;
344+
struct kvm_vm *vm;
345+
int i;
346+
347+
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
348+
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
349+
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
350+
315351
/* The maximum page table size for a memory region will be when the
316352
* smallest pages are used. Considering each page contains x page
317353
* table descriptors, the total extra size for page tables (for extra
318354
* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
319355
* than N/x*2.
320356
*/
321-
uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
322-
uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
323-
uint64_t pages = DEFAULT_GUEST_PHY_PAGES + extra_mem_pages + vcpu_pages + extra_pg_pages;
324-
struct kvm_vm *vm;
325-
int i;
357+
vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
358+
extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
359+
pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
326360

327361
TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
328362
"nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -354,8 +388,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
354388
uint32_t num_percpu_pages, void *guest_code,
355389
uint32_t vcpuids[])
356390
{
357-
return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
358-
num_percpu_pages, guest_code, vcpuids);
391+
return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
392+
extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
359393
}
360394

361395
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,

0 commit comments

Comments
 (0)