Skip to content

Commit b9ecb9a

Browse files
committed
Merge branch 'kvm-guest-sev-migration' into kvm-master
Add guest api and guest kernel support for SEV live migration. Introduces a new hypercall to notify the host of changes to the page encryption status. If the page is encrypted then it must be migrated through the SEV firmware or a helper VM sharing the key. If page is not encrypted then it can be migrated normally by userspace. This new hypercall is invoked using paravirt_ops. Conflicts: sev_active() replaced by cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT).
2 parents debe436 + 73f1b4f commit b9ecb9a

File tree

10 files changed

+202
-9
lines changed

10 files changed

+202
-9
lines changed

arch/x86/include/asm/kvm_para.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
8383
return ret;
8484
}
8585

86+
static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
87+
unsigned long p2, unsigned long p3)
88+
{
89+
long ret;
90+
91+
asm volatile("vmmcall"
92+
: "=a"(ret)
93+
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
94+
: "memory");
95+
return ret;
96+
}
97+
8698
#ifdef CONFIG_KVM_GUEST
8799
void kvmclock_init(void);
88100
void kvmclock_disable(void);

arch/x86/include/asm/mem_encrypt.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
4444

4545
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
4646
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
47+
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
48+
bool enc);
4749

4850
void __init mem_encrypt_free_decrypted_mem(void);
4951

@@ -78,6 +80,8 @@ static inline int __init
7880
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
7981
static inline int __init
8082
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
83+
static inline void __init
84+
early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
8185

8286
static inline void mem_encrypt_free_decrypted_mem(void) { }
8387

arch/x86/include/asm/paravirt.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
9797
PVOP_VCALL1(mmu.exit_mmap, mm);
9898
}
9999

100+
static inline void notify_page_enc_status_changed(unsigned long pfn,
101+
int npages, bool enc)
102+
{
103+
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
104+
}
105+
100106
#ifdef CONFIG_PARAVIRT_XXL
101107
static inline void load_sp0(unsigned long sp0)
102108
{

arch/x86/include/asm/paravirt_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct pv_mmu_ops {
168168

169169
/* Hook for intercepting the destruction of an mm_struct. */
170170
void (*exit_mmap)(struct mm_struct *mm);
171+
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
171172

172173
#ifdef CONFIG_PARAVIRT_XXL
173174
struct paravirt_callee_save read_cr2;

arch/x86/include/asm/set_memory.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
8383
int set_direct_map_invalid_noflush(struct page *page);
8484
int set_direct_map_default_noflush(struct page *page);
8585
bool kernel_page_present(struct page *page);
86+
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
8687

8788
extern int kernel_set_to_readonly;
8889

arch/x86/kernel/kvm.c

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <linux/swait.h>
2929
#include <linux/syscore_ops.h>
3030
#include <linux/cc_platform.h>
31+
#include <linux/efi.h>
3132
#include <asm/timer.h>
3233
#include <asm/cpu.h>
3334
#include <asm/traps.h>
@@ -41,6 +42,7 @@
4142
#include <asm/ptrace.h>
4243
#include <asm/reboot.h>
4344
#include <asm/svm.h>
45+
#include <asm/e820/api.h>
4446

4547
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
4648

@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
434436
kvm_disable_steal_time();
435437
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
436438
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
439+
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
440+
wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
437441
kvm_pv_disable_apf();
438442
if (!shutdown)
439443
apf_task_wake_all();
@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
548552
__send_ipi_mask(local_mask, vector);
549553
}
550554

555+
static int __init setup_efi_kvm_sev_migration(void)
556+
{
557+
efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
558+
efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
559+
efi_status_t status;
560+
unsigned long size;
561+
bool enabled;
562+
563+
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
564+
!kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
565+
return 0;
566+
567+
if (!efi_enabled(EFI_BOOT))
568+
return 0;
569+
570+
if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
571+
pr_info("%s : EFI runtime services are not enabled\n", __func__);
572+
return 0;
573+
}
574+
575+
size = sizeof(enabled);
576+
577+
/* Get variable contents into buffer */
578+
status = efi.get_variable(efi_sev_live_migration_enabled,
579+
&efi_variable_guid, NULL, &size, &enabled);
580+
581+
if (status == EFI_NOT_FOUND) {
582+
pr_info("%s : EFI live migration variable not found\n", __func__);
583+
return 0;
584+
}
585+
586+
if (status != EFI_SUCCESS) {
587+
pr_info("%s : EFI variable retrieval failed\n", __func__);
588+
return 0;
589+
}
590+
591+
if (enabled == 0) {
592+
pr_info("%s: live migration disabled in EFI\n", __func__);
593+
return 0;
594+
}
595+
596+
pr_info("%s : live migration enabled in EFI\n", __func__);
597+
wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
598+
599+
return 1;
600+
}
601+
602+
late_initcall(setup_efi_kvm_sev_migration);
603+
551604
/*
552605
* Set the IPI entry points
553606
*/
@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
806859
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
807860
}
808861

862+
static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
863+
{
864+
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
865+
KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
866+
}
867+
809868
static void __init kvm_init_platform(void)
810869
{
870+
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
871+
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
872+
unsigned long nr_pages;
873+
int i;
874+
875+
pv_ops.mmu.notify_page_enc_status_changed =
876+
kvm_sev_hc_page_enc_status;
877+
878+
/*
879+
* Reset the host's shared pages list related to kernel
880+
* specific page encryption status settings before we load a
881+
* new kernel by kexec. Reset the page encryption status
882+
* during early boot intead of just before kexec to avoid SMP
883+
* races during kvm_pv_guest_cpu_reboot().
884+
* NOTE: We cannot reset the complete shared pages list
885+
* here as we need to retain the UEFI/OVMF firmware
886+
* specific settings.
887+
*/
888+
889+
for (i = 0; i < e820_table->nr_entries; i++) {
890+
struct e820_entry *entry = &e820_table->entries[i];
891+
892+
if (entry->type != E820_TYPE_RAM)
893+
continue;
894+
895+
nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
896+
897+
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
898+
nr_pages,
899+
KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
900+
}
901+
902+
/*
903+
* Ensure that _bss_decrypted section is marked as decrypted in the
904+
* shared pages list.
905+
*/
906+
nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
907+
PAGE_SIZE);
908+
early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
909+
nr_pages, 0);
910+
911+
/*
912+
* If not booted using EFI, enable Live migration support.
913+
*/
914+
if (!efi_enabled(EFI_BOOT))
915+
wrmsrl(MSR_KVM_MIGRATION_CONTROL,
916+
KVM_MIGRATION_READY);
917+
}
811918
kvmclock_init();
812919
x86_platform.apic_post_init = kvm_apic_init;
813920
}

arch/x86/kernel/paravirt.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
337337
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
338338

339339
.mmu.exit_mmap = paravirt_nop,
340+
.mmu.notify_page_enc_status_changed = paravirt_nop,
340341

341342
#ifdef CONFIG_PARAVIRT_XXL
342343
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),

arch/x86/mm/mem_encrypt.c

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
229229
swiotlb_adjust_size(size);
230230
}
231231

232-
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
232+
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
233233
{
234-
pgprot_t old_prot, new_prot;
235-
unsigned long pfn, pa, size;
236-
pte_t new_pte;
234+
unsigned long pfn = 0;
235+
pgprot_t prot;
237236

238237
switch (level) {
239238
case PG_LEVEL_4K:
240239
pfn = pte_pfn(*kpte);
241-
old_prot = pte_pgprot(*kpte);
240+
prot = pte_pgprot(*kpte);
242241
break;
243242
case PG_LEVEL_2M:
244243
pfn = pmd_pfn(*(pmd_t *)kpte);
245-
old_prot = pmd_pgprot(*(pmd_t *)kpte);
244+
prot = pmd_pgprot(*(pmd_t *)kpte);
246245
break;
247246
case PG_LEVEL_1G:
248247
pfn = pud_pfn(*(pud_t *)kpte);
249-
old_prot = pud_pgprot(*(pud_t *)kpte);
248+
prot = pud_pgprot(*(pud_t *)kpte);
250249
break;
251250
default:
252-
return;
251+
WARN_ONCE(1, "Invalid level for kpte\n");
252+
return 0;
253+
}
254+
255+
if (ret_prot)
256+
*ret_prot = prot;
257+
258+
return pfn;
259+
}
260+
261+
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
262+
{
263+
#ifdef CONFIG_PARAVIRT
264+
unsigned long sz = npages << PAGE_SHIFT;
265+
unsigned long vaddr_end = vaddr + sz;
266+
267+
while (vaddr < vaddr_end) {
268+
int psize, pmask, level;
269+
unsigned long pfn;
270+
pte_t *kpte;
271+
272+
kpte = lookup_address(vaddr, &level);
273+
if (!kpte || pte_none(*kpte)) {
274+
WARN_ONCE(1, "kpte lookup for vaddr\n");
275+
return;
276+
}
277+
278+
pfn = pg_level_to_pfn(level, kpte, NULL);
279+
if (!pfn)
280+
continue;
281+
282+
psize = page_level_size(level);
283+
pmask = page_level_mask(level);
284+
285+
notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
286+
287+
vaddr = (vaddr & pmask) + psize;
253288
}
289+
#endif
290+
}
291+
292+
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
293+
{
294+
pgprot_t old_prot, new_prot;
295+
unsigned long pfn, pa, size;
296+
pte_t new_pte;
297+
298+
pfn = pg_level_to_pfn(level, kpte, &old_prot);
299+
if (!pfn)
300+
return;
254301

255302
new_prot = old_prot;
256303
if (enc)
@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
286333
static int __init early_set_memory_enc_dec(unsigned long vaddr,
287334
unsigned long size, bool enc)
288335
{
289-
unsigned long vaddr_end, vaddr_next;
336+
unsigned long vaddr_end, vaddr_next, start;
290337
unsigned long psize, pmask;
291338
int split_page_size_mask;
292339
int level, ret;
293340
pte_t *kpte;
294341

342+
start = vaddr;
295343
vaddr_next = vaddr;
296344
vaddr_end = vaddr + size;
297345

@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
346394

347395
ret = 0;
348396

397+
notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
349398
out:
350399
__flush_tlb_all();
351400
return ret;
@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
361410
return early_set_memory_enc_dec(vaddr, size, true);
362411
}
363412

413+
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
414+
{
415+
notify_range_enc_status_changed(vaddr, npages, enc);
416+
}
417+
364418
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
365419
bool force_dma_unencrypted(struct device *dev)
366420
{

arch/x86/mm/pat/set_memory.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
20232023
*/
20242024
cpa_flush(&cpa, 0);
20252025

2026+
/*
2027+
* Notify hypervisor that a given memory range is mapped encrypted
2028+
* or decrypted.
2029+
*/
2030+
notify_range_enc_status_changed(addr, numpages, enc);
2031+
20262032
return ret;
20272033
}
20282034

include/linux/efi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
362362

363363
/* OEM GUIDs */
364364
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
365+
#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
365366

366367
typedef struct {
367368
efi_guid_t guid;

0 commit comments

Comments
 (0)