Skip to content

Commit 3074152

Browse files
ashkalrabp3tk0v
authored andcommitted
x86/sev: Convert shared memory back to private on kexec
SNP guests allocate shared buffers to perform I/O. It is done by allocating pages normally from the buddy allocator and converting them to shared with set_memory_decrypted(). The second, kexec-ed, kernel has no idea what memory is converted this way. It only sees E820_TYPE_RAM. Accessing shared memory via private mapping will cause unrecoverable RMP page-faults. On kexec, walk direct mapping and convert all shared memory back to private. It makes all RAM private again and second kernel may use it normally. Additionally, for SNP guests, convert all bss decrypted section pages back to private. The conversion occurs in two steps: stopping new conversions and unsharing all memory. In the case of normal kexec, the stopping of conversions takes place while scheduling is still functioning. This allows for waiting until any ongoing conversions are finished. The second step is carried out when all CPUs except one are inactive and interrupts are disabled. This prevents any conflicts with code that may access shared memory. Co-developed-by: Borislav Petkov (AMD) <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Signed-off-by: Ashish Kalra <[email protected]> Reviewed-by: Tom Lendacky <[email protected]> Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com
1 parent 2a78306 commit 3074152

File tree

3 files changed

+137
-0
lines changed

3 files changed

+137
-0
lines changed

arch/x86/coco/sev/core.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,137 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
954954
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
955955
}
956956

957+
static void set_pte_enc(pte_t *kpte, int level, void *va)
958+
{
959+
struct pte_enc_desc d = {
960+
.kpte = kpte,
961+
.pte_level = level,
962+
.va = va,
963+
.encrypt = true
964+
};
965+
966+
prepare_pte_enc(&d);
967+
set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
968+
}
969+
970+
static void unshare_all_memory(void)
971+
{
972+
unsigned long addr, end, size, ghcb;
973+
struct sev_es_runtime_data *data;
974+
unsigned int npages, level;
975+
bool skipped_addr;
976+
pte_t *pte;
977+
int cpu;
978+
979+
/* Unshare the direct mapping. */
980+
addr = PAGE_OFFSET;
981+
end = PAGE_OFFSET + get_max_mapped();
982+
983+
while (addr < end) {
984+
pte = lookup_address(addr, &level);
985+
size = page_level_size(level);
986+
npages = size / PAGE_SIZE;
987+
skipped_addr = false;
988+
989+
if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
990+
addr += size;
991+
continue;
992+
}
993+
994+
/*
995+
* Ensure that all the per-CPU GHCBs are made private at the
996+
* end of the unsharing loop so that the switch to the slower
997+
* MSR protocol happens last.
998+
*/
999+
for_each_possible_cpu(cpu) {
1000+
data = per_cpu(runtime_data, cpu);
1001+
ghcb = (unsigned long)&data->ghcb_page;
1002+
1003+
if (addr <= ghcb && ghcb <= addr + size) {
1004+
skipped_addr = true;
1005+
break;
1006+
}
1007+
}
1008+
1009+
if (!skipped_addr) {
1010+
set_pte_enc(pte, level, (void *)addr);
1011+
snp_set_memory_private(addr, npages);
1012+
}
1013+
addr += size;
1014+
}
1015+
1016+
/* Unshare all bss decrypted memory. */
1017+
addr = (unsigned long)__start_bss_decrypted;
1018+
end = (unsigned long)__start_bss_decrypted_unused;
1019+
npages = (end - addr) >> PAGE_SHIFT;
1020+
1021+
for (; addr < end; addr += PAGE_SIZE) {
1022+
pte = lookup_address(addr, &level);
1023+
if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
1024+
continue;
1025+
1026+
set_pte_enc(pte, level, (void *)addr);
1027+
}
1028+
addr = (unsigned long)__start_bss_decrypted;
1029+
snp_set_memory_private(addr, npages);
1030+
1031+
__flush_tlb_all();
1032+
}
1033+
1034+
/* Stop new private<->shared conversions */
1035+
void snp_kexec_begin(void)
1036+
{
1037+
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1038+
return;
1039+
1040+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
1041+
return;
1042+
1043+
/*
1044+
* Crash kernel ends up here with interrupts disabled: can't wait for
1045+
* conversions to finish.
1046+
*
1047+
* If race happened, just report and proceed.
1048+
*/
1049+
if (!set_memory_enc_stop_conversion())
1050+
pr_warn("Failed to stop shared<->private conversions\n");
1051+
}
1052+
1053+
void snp_kexec_finish(void)
1054+
{
1055+
struct sev_es_runtime_data *data;
1056+
unsigned int level, cpu;
1057+
unsigned long size;
1058+
struct ghcb *ghcb;
1059+
pte_t *pte;
1060+
1061+
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1062+
return;
1063+
1064+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
1065+
return;
1066+
1067+
unshare_all_memory();
1068+
1069+
/*
1070+
* Switch to using the MSR protocol to change per-CPU GHCBs to
1071+
* private. All the per-CPU GHCBs have been switched back to private,
1072+
* so can't do any more GHCB calls to the hypervisor beyond this point
1073+
* until the kexec'ed kernel starts running.
1074+
*/
1075+
boot_ghcb = NULL;
1076+
sev_cfg.ghcbs_initialized = false;
1077+
1078+
for_each_possible_cpu(cpu) {
1079+
data = per_cpu(runtime_data, cpu);
1080+
ghcb = &data->ghcb_page;
1081+
pte = lookup_address((unsigned long)ghcb, &level);
1082+
size = page_level_size(level);
1083+
set_pte_enc(pte, level, (void *)ghcb);
1084+
snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
1085+
}
1086+
}
1087+
9571088
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
9581089
{
9591090
int ret;

arch/x86/include/asm/sev.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ void sev_show_status(void);
455455
void snp_update_svsm_ca(void);
456456
int prepare_pte_enc(struct pte_enc_desc *d);
457457
void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
458+
void snp_kexec_finish(void);
459+
void snp_kexec_begin(void);
458460

459461
#else /* !CONFIG_AMD_MEM_ENCRYPT */
460462

@@ -494,6 +496,8 @@ static inline void sev_show_status(void) { }
494496
static inline void snp_update_svsm_ca(void) { }
495497
static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
496498
static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
499+
static inline void snp_kexec_finish(void) { }
500+
static inline void snp_kexec_begin(void) { }
497501

498502
#endif /* CONFIG_AMD_MEM_ENCRYPT */
499503

arch/x86/mm/mem_encrypt_amd.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ void __init sme_early_init(void)
490490
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
491491
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
492492
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
493+
x86_platform.guest.enc_kexec_begin = snp_kexec_begin;
494+
x86_platform.guest.enc_kexec_finish = snp_kexec_finish;
493495

494496
/*
495497
* AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the

0 commit comments

Comments
 (0)