diff --git a/Microsoft/x64-cvm.config b/Microsoft/x64-cvm.config index 054a91783c73..b5ae5b1f1787 100644 --- a/Microsoft/x64-cvm.config +++ b/Microsoft/x64-cvm.config @@ -3,5 +3,6 @@ CONFIG_VIRT_DRIVERS=y CONFIG_TDX_GUEST_DRIVER=y CONFIG_SEV_GUEST=y CONFIG_AMD_MEM_ENCRYPT=y +CONFIG_AMD_SECURE_AVIC=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_LIB_AES=y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07ee295368d4..509b063af66e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -473,6 +473,18 @@ config X86_X2APIC If you don't know what to do here, say N. +config AMD_SECURE_AVIC + bool "AMD Secure AVIC" + depends on X86_X2APIC && AMD_MEM_ENCRYPT + help + This enables AMD Secure AVIC support on guests that have this feature. + + AMD Secure AVIC provides hardware acceleration for performance sensitive + APIC accesses and support for managing guest owned APIC state for SEV-SNP + guests. + + If you don't know what to do here, say N. + config X86_POSTED_MSI bool "Enable MSI and MSI-x delivery by posted interrupts" depends on X86_64 && IRQ_REMAP diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index af39855a390a..6379edf5d97a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -394,6 +394,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) MSR_AMD64_SNP_VMSA_REG_PROT | \ MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_SECURE_AVIC_ENABLED | \ MSR_AMD64_SNP_RESERVED_MASK) /* @@ -401,7 +402,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | MSR_AMD64_SNP_SECURE_AVIC_ENABLED) u64 snp_get_unsupported_features(u64 status) { diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 0f81f70aca82..4c3bc031e9a9 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -100,6 +100,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_HOST_SEV_SNP: return cc_flags.host_sev_snp; + case CC_ATTR_SNP_SECURE_AVIC: + return sev_status & MSR_AMD64_SNP_SECURE_AVIC_ENABLED; + default: return false; } diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index a0b73e6ed747..a1eadbcbbe9e 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1181,6 +1181,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + vmsa->vintr_ctrl |= (V_GIF_MASK | V_NMI_ENABLE_MASK); + /* SVME must be set. */ vmsa->efer = EFER_SVME; @@ -1322,18 +1325,15 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd) return 0; } -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) { struct pt_regs *regs = ctxt->regs; + u64 exit_info_1 = write ? 1 : 0; enum es_result ret; - u64 exit_info_1; - - /* Is it a WRMSR? */ - exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; if (regs->cx == MSR_SVSM_CAA) { /* Writes to the SVSM CAA msr are ignored */ - if (exit_info_1) + if (write) return ES_OK; regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); @@ -1343,14 +1343,14 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) } ghcb_set_rcx(ghcb, regs->cx); - if (exit_info_1) { + if (write) { ghcb_set_rax(ghcb, regs->ax); ghcb_set_rdx(ghcb, regs->dx); } ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); - if ((ret == ES_OK) && (!exit_info_1)) { + if (ret == ES_OK && !write) { regs->ax = ghcb->save.rax; regs->dx = ghcb->save.rdx; } @@ -1358,6 +1358,81 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + return __vc_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30); +} + +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) +{ + struct pt_regs regs = { .cx = msr }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, false); + if (ret == ES_OK) + *value = regs.ax | regs.dx << 32; + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + +enum es_result sev_ghcb_msr_write(u64 msr, u64 value) +{ + struct pt_regs regs = { + .cx = msr, + .ax = lower_32_bits(value), + .dx = upper_32_bits(value) + }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, true); + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + +enum es_result sev_notify_savic_gpa(u64 gpa) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SECURE_AVIC_GPA, gpa, 0); + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + return ret; +} + static void snp_register_per_cpu_ghcb(void) { struct sev_es_runtime_data *data; diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 0569f579338b..34987b223418 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -288,6 +288,9 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { pr_info("Hyper-V: Using IPI hypercalls\n"); /* diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a44c60c105f8..bf20e655076f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -40,6 +40,7 @@ void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); +void *hv_vp_early_input_arg; union hv_ghcb * __percpu *hv_ghcb_pg; /* Storage to save the hypercall page temporarily for hibernation */ @@ -84,6 +85,17 @@ static int hv_cpu_init(unsigned int cpu) if (ret) return ret; + /* Allow Hyper-V vector to be injected from Hypervisor. */ + if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) + x2apic_savic_update_vector(cpu, + HYPERV_REENLIGHTENMENT_VECTOR, true); + + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + x2apic_savic_update_vector(cpu, + HYPERV_STIMER0_VECTOR, true); + + x2apic_savic_update_vector(cpu, HYPERVISOR_CALLBACK_VECTOR, true); + return hyperv_init_ghcb(); } @@ -357,6 +369,7 @@ void __init hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; + int ret; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -364,6 +377,22 @@ void __init hyperv_init(void) if (hv_common_init()) return; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) { + hv_vp_early_input_arg = kcalloc(num_possible_cpus(), + PAGE_SIZE, + GFP_KERNEL); + if (hv_vp_early_input_arg) { + ret = set_memory_decrypted((u64)hv_vp_early_input_arg, + num_possible_cpus()); + if (ret) { + kfree(hv_vp_early_input_arg); + goto common_free; + } + } else { + goto common_free; + } + } + /* * The VP assist page is useless to a TDX guest: the only use we * would have for it is lazy EOI, which can not be used with TDX. @@ -378,7 +407,7 @@ void __init hyperv_init(void) ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; if (!hv_isolation_type_tdx()) - goto common_free; + goto free_vp_early_input_arg; } if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { @@ -538,6 +567,10 @@ void __init hyperv_init(void) free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; +free_vp_early_input_arg: + set_memory_encrypted((u64)hv_vp_early_input_arg, num_possible_cpus()); + kfree(hv_vp_early_input_arg); + hv_vp_early_input_arg = NULL; common_free: hv_common_free(); } diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 4bd2e881e9e7..03b439ac54a3 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -289,6 +289,43 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } +enum es_result hv_set_savic_backing_page(u64 gfn) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_SET_VP_REGISTERS; + struct hv_set_vp_registers_input *input + = hv_vp_early_input_arg + smp_processor_id() * PAGE_SIZE; + union hv_x64_register_sev_gpa_page value; + unsigned long flags; + int retry = 5; + u64 ret; + + local_irq_save(flags); + + value.enabled = 1; + value.reserved = 0; + value.pagenumber = gfn; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = ms_hyperv.vtl; + input->element[0].name = HV_X64_REGISTER_SEV_AVIC_GPA; + input->element[0].value.reg64 = value.u64; + + do { + ret = hv_do_hypercall(control, input, NULL); + } while (ret == HV_STATUS_TIME_OUT && retry--); + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); + + local_irq_restore(flags); + + if (hv_result_success(ret)) + return ES_OK; + else + return ES_VMM_ERROR; +} + int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 33f677e2db75..e504b5e597ed 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -241,6 +241,16 @@ static inline u64 native_x2apic_icr_read(void) return val; } +#if defined(CONFIG_AMD_SECURE_AVIC) +extern void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, + bool set); +extern void x2apic_savic_init_backing_page(void *backing_page); +#else +static inline void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, bool set) { } +#endif + extern int x2apic_mode; extern int x2apic_phys; extern void __init x2apic_set_max_apicid(u32 apicid); @@ -305,6 +315,7 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); + void (*setup)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); void (*init_apic_ldr)(void); @@ -317,6 +328,8 @@ struct apic { /* wakeup secondary CPU using 64-bit wakeup point */ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); + char *name; }; diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 094106b6a538..be39a543fbe5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -135,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index a0c992faa1e9..910b03d74c85 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -311,6 +311,14 @@ enum hv_isolation_type { #define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) #define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) +#define HV_X64_REGISTER_SEV_AVIC_GPA 0x00090043 + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6e94e4e8230c..421cc6d3d0ac 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -43,6 +43,7 @@ static inline unsigned char hv_get_nmi_reason(void) extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; +extern void *hv_vp_early_input_arg; extern u64 hv_current_partition_id; @@ -160,7 +161,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) : "cc", "edi", "esi"); } #endif - return hv_status; + return hv_status; } static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) @@ -265,6 +266,7 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); +enum es_result hv_set_savic_backing_page(u64 gfn); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ae84c3b8e6d..4b219f089528 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -680,8 +680,15 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_ENABLED BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SECURE_AVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SECURE_AVIC_EN_BIT 0 +#define MSR_AMD64_SECURE_AVIC_EN BIT_ULL(MSR_AMD64_SECURE_AVIC_EN_BIT) +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 833954e5aade..89bc19d3f6c1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -109,7 +109,12 @@ struct rmp_state { u32 asid; } __packed; -#define RMPADJUST_VMSA_PAGE_BIT BIT(16) +/* Target VMPL takes the first byte */ +#define RMPADJUST_ENABLE_READ BIT(8) +#define RMPADJUST_ENABLE_WRITE BIT(9) +#define RMPADJUST_USER_EXECUTE BIT(10) +#define RMPADJUST_KERNEL_EXECUTE BIT(11) +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) /* SNP Guest message request */ struct snp_req_data { @@ -414,6 +419,9 @@ u64 sev_get_status(void); void sev_show_status(void); void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); +enum es_result sev_notify_savic_gpa(u64 gpa); +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value); +enum es_result sev_ghcb_msr_write(u64 msr, u64 value); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -451,6 +459,9 @@ static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } +static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_write(u64 msr, u64 value) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 2b59b9951c90..d5207e9badd3 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,7 +5,8 @@ #include #include -#include +/* TODO: including into mshv_vtl_main.c breaks the build. */ +// #include /* * 32-bit intercept words in the VMCB Control Area, starting @@ -164,7 +165,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * for use by hypervisor/software. */ union { - struct hv_vmcb_enlightenments hv_enlightenments; + /* TODO: including into mshv_vtl_main.c breaks the build. */ + // struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; @@ -183,6 +185,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_GIF_SHIFT 9 #define V_GIF_MASK (1 << V_GIF_SHIFT) +#define V_INT_SHADOW 10 +#define V_INT_SHADOW_MASK (1 << V_INT_SHADOW) + #define V_NMI_PENDING_SHIFT 11 #define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT) @@ -195,6 +200,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_GUEST_BUSY_SHIFT 63 +#define V_GUEST_BUSY_MASK (1ULL << V_GUEST_BUSY_SHIFT) + #define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) #define V_INTR_MASKING_SHIFT 24 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..89876c35dd11 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -94,8 +94,10 @@ #define SVM_EXIT_CR13_WRITE_TRAP 0x09d #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f -#define SVM_EXIT_INVPCID 0x0a2 -#define SVM_EXIT_NPF 0x400 +#define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUSLOCK 0x0a5 +#define SVM_EXIT_IDLE_HLT 0x0a6 +#define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 #define SVM_EXIT_VMGEXIT 0x403 @@ -116,6 +118,7 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SECURE_AVIC_GPA 0x8000001a #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 3bf0487cf3b7..12153993c12b 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y) # APIC probe will depend on the listing order here obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c5fb28e6451a..95ae177dff88 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -591,6 +591,10 @@ static void setup_APIC_timer(void) 0xF, ~0UL); } else clockevents_register_device(levt); + + if (apic->update_vector) + apic->update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, + true); } /* @@ -1504,6 +1508,8 @@ static void setup_local_APIC(void) return; } + if (apic->setup) + apic->setup(); /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 557318145038..5aa65a732b05 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, managed); } @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, if (irqd_is_activated(irqd)) { trace_vector_setup(virq, true, 0); apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } else { /* Release the vector */ apicd->can_reserve = true; irqd_set_can_reserve(irqd); clear_irq_vector(irqd); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); realloc = true; } raw_spin_unlock_irqrestore(&vector_lock, flags); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c new file mode 100644 index 000000000000..9c6181229165 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure AVIC Support (SEV-SNP Guests) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Author: Kishon Vijay Abraham I + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "local.h" + +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) + +static DEFINE_PER_CPU(void *, apic_backing_page); +static DEFINE_PER_CPU(bool, savic_setup_done); + +enum lapic_lvt_entry { + LVT_TIMER, + LVT_THERMAL_MONITOR, + LVT_PERFORMANCE_COUNTER, + LVT_LINT0, + LVT_LINT1, + LVT_ERROR, + + APIC_MAX_NR_LVT_ENTRIES, +}; + +#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) + +static inline void savic_wr_control_msr(u64 val) +{ + native_wrmsr(MSR_AMD64_SECURE_AVIC_CONTROL, lower_32_bits(val), upper_32_bits(val)); +} + +static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); +} + +static inline u32 get_reg(char *page, int reg_off) +{ + return READ_ONCE(*((u32 *)(page + reg_off))); +} + +static inline void set_reg(char *page, int reg_off, u32 val) +{ + WRITE_ONCE(*((u32 *)(page + reg_off)), val); +} + +static u32 read_msr_from_hv(u32 reg) +{ + u64 data, msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_read(msr, &data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); + /* MSR read failures are treated as fatal errors */ + snp_abort(); + } + + return lower_32_bits(data); +} + +static void write_msr_to_hv(u32 reg, u64 data) +{ + u64 msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_write(msr, data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) write returned error (%d)\n", msr, ret); + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ + snp_abort(); + } +} + +#define SAVIC_ALLOWED_IRR_OFFSET 0x204 + +static u32 x2apic_savic_read(u32 reg) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + return read_msr_from_hv(reg); + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return get_reg(backing_page, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); + return get_reg(backing_page, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Either aligned at 16 bytes for valid IRR reg offset or a + * valid Secure AVIC ALLOWED_IRR offset. + */ + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); + return get_reg(backing_page, reg); + default: + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ_OFFSET 0x278 + +static void x2apic_savic_write(u32 reg, u32 data) +{ + void *backing_page = this_cpu_read(apic_backing_page); + unsigned int cfg; + + switch (reg) { + case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: + case APIC_TMICT: + case APIC_TDCR: + write_msr_to_hv(reg, data); + break; + /* APIC_ID is writable and configured by guest for Secure AVIC */ + case APIC_ID: + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ_OFFSET: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + set_reg(backing_page, reg, data); + break; + /* Self IPIs are accelerated by hardware, use wrmsr */ + case APIC_SELF_IPI: + cfg = __prepare_ICR(APIC_DEST_SELF, data, 0); + native_x2apic_icr_write(cfg, 0); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: + if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { + set_reg(backing_page, reg, data); + break; + } + fallthrough; + default: + pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg); + } +} + +static void send_ipi(int cpu, int vector, bool nmi) +{ + void *backing_page; + int reg_off; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = APIC_IRR + REG_POS(vector); + /* + * Use test_and_set_bit() to ensure that IRR updates are atomic w.r.t. other + * IRR updates such as during VMRUN and during CPU interrupt handling flow. + */ + test_and_set_bit(VEC_POS(vector), (unsigned long *)((char *)backing_page + reg_off)); + if (nmi) + set_reg(backing_page, SAVIC_NMI_REQ_OFFSET, nmi); +} + +static void send_ipi_dest(u64 icr_data) +{ + int vector, cpu; + bool nmi; + + vector = icr_data & APIC_VECTOR_MASK; + cpu = icr_data >> 32; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); + + send_ipi(cpu, vector, nmi); +} + +static void send_ipi_target(u64 icr_data) +{ + if (icr_data & APIC_DEST_LOGICAL) { + pr_err("IPI target should be of PHYSICAL type\n"); + return; + } + + send_ipi_dest(icr_data); +} + +static void send_ipi_allbut(u64 icr_data) +{ + const struct cpumask *self_cpu_mask = get_cpu_mask(smp_processor_id()); + unsigned long flags; + int vector, cpu; + bool nmi; + + vector = icr_data & APIC_VECTOR_MASK; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); + local_irq_save(flags); + for_each_cpu_andnot(cpu, cpu_present_mask, self_cpu_mask) + send_ipi(cpu, vector, nmi); + write_msr_to_hv(APIC_ICR, icr_data); + local_irq_restore(flags); +} + +static void send_ipi_allinc(u64 icr_data) +{ + int vector; + + send_ipi_allbut(icr_data); + vector = icr_data & APIC_VECTOR_MASK; + native_x2apic_icr_write(APIC_DEST_SELF | vector, 0); +} + +static void x2apic_savic_icr_write(u32 icr_low, u32 icr_high) +{ + int dsh, vector; + u64 icr_data; + + icr_data = ((u64)icr_high) << 32 | icr_low; + dsh = icr_low & APIC_DEST_ALLBUT; + + switch (dsh) { + case APIC_DEST_SELF: + vector = icr_data & APIC_VECTOR_MASK; + x2apic_savic_write(APIC_SELF_IPI, vector); + break; + case APIC_DEST_ALLINC: + send_ipi_allinc(icr_data); + break; + case APIC_DEST_ALLBUT: + send_ipi_allbut(icr_data); + break; + default: + send_ipi_target(icr_data); + write_msr_to_hv(APIC_ICR, icr_data); + } +} + +static void __send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned int cfg = __prepare_ICR(0, vector, dest); + + x2apic_savic_icr_write(cfg, apicid); +} + +static void x2apic_savic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + __send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); +} + +static void +__send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) +{ + unsigned long query_cpu; + unsigned long this_cpu; + unsigned long flags; + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + for_each_cpu(query_cpu, mask) { + if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) + continue; + __send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, + APIC_DEST_PHYSICAL); + } + + local_irq_restore(flags); +} + +static void x2apic_savic_send_IPI_mask(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); +} + +static void __send_IPI_shorthand(int vector, u32 which) +{ + unsigned int cfg = __prepare_ICR(which, vector, 0); + + x2apic_savic_icr_write(cfg, 0); +} + +static void x2apic_savic_send_IPI_allbutself(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLBUT); +} + +static void x2apic_savic_send_IPI_all(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_self(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_SELF); +} + +void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + void *backing_page; + unsigned long *reg; + int reg_off; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = SAVIC_ALLOWED_IRR_OFFSET + REG_POS(vector); + reg = (unsigned long *)((char *)backing_page + reg_off); + + if (set) + test_and_set_bit(VEC_POS(vector), reg); + else + test_and_clear_bit(VEC_POS(vector), reg); +} + +void x2apic_savic_init_backing_page(void *backing_page) +{ + u32 hv_apic_id; + u32 apic_id; + u32 val; + int i; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + + val = read_msr_from_hv(APIC_LVR); + set_reg(backing_page, APIC_LVR, val); + + /* + * Hypervisor is used for all timer related functions, + * so don't copy those values. + */ + for (i = LVT_THERMAL_MONITOR; i < APIC_MAX_NR_LVT_ENTRIES; i++) { + val = read_msr_from_hv(APIC_LVTx(i)); + set_reg(backing_page, APIC_LVTx(i), val); + } + + val = read_msr_from_hv(APIC_LVT0); + set_reg(backing_page, APIC_LVT0, val); + + val = read_msr_from_hv(APIC_LDR); + set_reg(backing_page, APIC_LDR, val); + + /* Read APIC ID from Extended Topology Enumeration CPUID */ + apic_id = cpuid_edx(0x0000000b); + hv_apic_id = read_msr_from_hv(APIC_ID); + WARN_ONCE(hv_apic_id != apic_id, "Inconsistent APIC_ID values: %d (cpuid), %d (msr)", + apic_id, hv_apic_id); + set_reg(backing_page, APIC_ID, apic_id); +} + +static void x2apic_savic_setup(void) +{ + void *backing_page; + enum es_result ret; + unsigned long gpa; + unsigned long gfn; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + + if (this_cpu_read(savic_setup_done)) + return; + + backing_page = this_cpu_read(apic_backing_page); + x2apic_savic_init_backing_page(backing_page); + gpa = __pa(backing_page); + gfn = gpa >> PAGE_SHIFT; + + if (hv_isolation_type_snp()) + ret = hv_set_savic_backing_page(gfn); + else + ret = sev_notify_savic_gpa(gpa); + + if (ret != ES_OK) + snp_abort(); + savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_EN | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); + this_cpu_write(savic_setup_done, true); +} + +static int x2apic_savic_probe(void) +{ + void *backing_pages; + unsigned int cpu; + size_t sz; + int i; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return 0; + + if (!x2apic_mode) { + pr_err("Secure AVIC enabled in non x2APIC mode\n"); + snp_abort(); + } + + sz = ALIGN(num_possible_cpus() * SZ_4K, SZ_2M); + backing_pages = kzalloc(sz, GFP_ATOMIC); + if (!backing_pages) + snp_abort(); + + i = 0; + for_each_possible_cpu(cpu) { + per_cpu(apic_backing_page, cpu) = backing_pages + i * SZ_4K; + i++; + } + + pr_info("Secure AVIC Enabled\n"); + + return 1; +} + +static struct apic apic_x2apic_savic __ro_after_init = { + + .name = "secure avic x2apic", + .probe = x2apic_savic_probe, + .acpi_madt_oem_check = x2apic_savic_acpi_madt_oem_check, + .setup = x2apic_savic_setup, + + .dest_mode_logical = false, + + .disable_esr = 0, + + .cpu_present_to_apicid = default_cpu_present_to_apicid, + + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, + .get_apic_id = x2apic_get_apic_id, + + .calc_dest_apicid = apic_default_calc_apicid, + + .send_IPI = x2apic_savic_send_IPI, + .send_IPI_mask = x2apic_savic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_savic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_savic_send_IPI_allbutself, + .send_IPI_all = x2apic_savic_send_IPI_all, + .send_IPI_self = x2apic_savic_send_IPI_self, + .nmi_to_offline_cpu = true, + + .read = x2apic_savic_read, + .write = x2apic_savic_write, + .eoi = native_apic_msr_eoi, + .icr_read = native_x2apic_icr_read, + .icr_write = x2apic_savic_icr_write, + + .update_vector = x2apic_savic_update_vector, +}; + +apic_driver(apic_x2apic_savic); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c6797cf9f37c..c2ae8ea213b0 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -460,6 +460,9 @@ static void __init ms_hyperv_init_platform(void) pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED; + /* * Check CPU management privilege. * diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fc8729deb659..92003b8004f7 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_SEV_GUEST +#include +#endif #include #include "hyperv_vmbus.h" @@ -312,7 +315,9 @@ void hv_synic_enable_regs(unsigned int cpu) if (vmbus_irq != -1) enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT); - +#ifdef CONFIG_SEV_GUEST + x2apic_savic_update_vector(smp_processor_id(), vmbus_interrupt, true); +#endif shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index b727c76d17b4..412c227bfd39 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -167,6 +168,9 @@ struct mshv_vtl_per_cpu { bool msrs_are_guest; struct user_return_notifier mshv_urn; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; +#endif }; static struct mutex mshv_vtl_poll_file_lock; @@ -196,20 +200,66 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu) return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); } -#if defined(CONFIG_X86_64) && defined(CONFIG_INTEL_TDX_GUEST) +#if defined(CONFIG_X86_64) + +#if defined(CONFIG_INTEL_TDX_GUEST) + +static struct page *tdx_this_apic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +} + +static u32 *mshv_tdx_vapic_irr(void) +{ + return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); +} + +#endif /* defined(CONFIG_INTEL_TDX_GUEST) */ static struct page *tdx_apic_page(int cpu) { +#if defined(CONFIG_INTEL_TDX_GUEST) return *per_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } -static struct page *tdx_this_apic_page(void) +static struct page *snp_secure_avic_page(int cpu) { - return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +#if defined(CONFIG_SEV_GUEST) + return *per_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } +static u32 *mshv_snp_secure_avic_irr(int cpu) +{ +#if defined(CONFIG_SEV_GUEST) + return (u32 *)((char *)page_address(snp_secure_avic_page(cpu)) + APIC_IRR); +#else + (void)cpu; + return NULL; +#endif +} + +static struct page* mshv_apic_page(int cpu) +{ + if (hv_isolation_type_tdx()) + return tdx_apic_page(cpu); + else if (hv_isolation_type_snp()) + return snp_secure_avic_page(cpu); + + return NULL; +} + +#if defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) /* - * For ICR emulation on TDX, we need a fast way to map APICIDs to CPUIDs. + * For ICR emulation when running a hardware isolated guest, we need a fast way to map + * APICIDs to CPUIDs. * Instead of iterating through all CPUs for each target in the ICR destination field * precompute a mapping. APICIDs can be sparse so we have to use a hash table. * Note: CPU hotplug is not supported (both by this code and by the paravisor in general) @@ -225,21 +275,250 @@ struct apicid_to_cpuid_entry { * Sets the cpu described by apicid in cpu_mask. * Returns 0 on success, -EINVAL if no cpu matches the apicid. */ -static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) +static int mshv_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) { struct apicid_to_cpuid_entry *found; hash_for_each_possible(apicid_to_cpuid, found, node, apicid) { if (found->apicid != apicid) continue; - cpumask_set_cpu(found->cpuid, cpu_mask); return 0; } return -EINVAL; } -#endif + +/* + * Returns the cpumask described by dest, where dest is a logical destination. + * cpu_mask should have no CPUs set. + * Returns 0 on success + */ +static int mshv_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) +{ + int ret = 0; + + while ((u16)dest) { + const u16 i = fls((u16)dest) - 1; + const u32 physical_id = (dest >> 16 << 4) | i; + + ret = mshv_set_cpumask_from_apicid(physical_id, cpu_mask); + dest &= ~BIT(i); + if (ret) + break; + } + + return ret; +} + +/* + * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), + * is a hot path on hardware-isolated VMs. By performing some of the common functionality + * entirely in-kernel we eliminate costly user<->kernel transitions. + */ +static void mshv_free_apicid_to_cpuid_mapping(void) +{ + int bkt; + struct apicid_to_cpuid_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +/* + * Creates and populates the apicid_to_cpuid hash table. + * This mapping is used for fast ICR emulation on on hardware-isolated VMs. + * Returns 0 on success. + */ +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) { + struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) { + ret = -ENOMEM; + break; + } + + entry->apicid = cpuid_to_apicid[cpu]; + entry->cpuid = cpu; + + if (entry->apicid == BAD_APICID) { + dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); + ret = -ENODEV; + break; + } + + hash_add(apicid_to_cpuid, &entry->node, entry->apicid); + } + + if (ret) + mshv_free_apicid_to_cpuid_mapping(); + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_cpu_mask_for_icr_write(u32 icr_lo, u32 dest, struct cpumask* local_mask) +{ + const u8 shorthand = (icr_lo >> 18) & 0b11; + const u32 self = smp_processor_id(); + int ret = 0; + + cpumask_clear(local_mask); + if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ + cpumask_copy(local_mask, cpu_online_mask); + } else if (shorthand == 0b11) { /* shorthand all but self */ + cpumask_copy(local_mask, cpu_online_mask); + cpumask_clear_cpu(self, local_mask); + } else if (shorthand == 0b01) { /* shorthand self */ + cpumask_set_cpu(self, local_mask); + } else if (icr_lo & BIT(11)) { /* logical */ + ret = mshv_get_logical_cpumask(dest, local_mask); + } else { /* physical */ + ret = mshv_set_cpumask_from_apicid(dest, local_mask); + } + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_update_proxy_irr_for_icr_write(u32 icr_lo, struct cpumask *local_mask) +{ + const u8 vector = icr_lo; + const u64 bank = vector / 32; + const u32 mask = BIT(vector % 32); + const u32 self = smp_processor_id(); + + unsigned int cpu; + bool send_ipi; + + send_ipi = false; + for_each_cpu(cpu, local_mask) { + /* + * The kernel doesn't provide an atomic_or which operates on u32, + * so cast to atomic_t, which should have the same layout + */ + static_assert(sizeof(atomic_t) == sizeof(u32)); + atomic_or(mask, (atomic_t *) + (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); + smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); + send_ipi |= cpu != self; + } + + if (send_ipi) { + cpumask_clear_cpu(self, local_mask); + __apic_send_IPI_mask(local_mask, RESCHEDULE_VECTOR); + } + + return 0; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + * Secure AVIC accelerates self-IPI only. + */ +static int mshv_snp_handle_simple_icr_write(u32 icr_lo, u32 dest) +{ + const u8 vector = icr_lo; + + struct cpumask local_mask; + unsigned int cpu; + int ret; + + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); + if (ret) + return ret; + + // Probobaly shouldn't update the target VP's IRRs to inject the + // interrupt, there might be more state to account for. The target + // VP will go into the user mode anyway, not much to be saved? + + // for_each_cpu(cpu, &local_mask) { + // u64 irr_reg_off; + // unsigned long *irr_reg; + // void* irr; + + // /* + // * IRRs are banked into eight 32-bit registers each starting on the + // * 16-byte boundary (4 byte of an IRR + 12 byte stride). + // */ + // irr_reg_off = (vector >> 5) << 4; + // irr = mshv_snp_secure_avic_irr(cpu); + // irr_reg = (unsigned long*)((u8*)irr + irr_reg_off); + + // /* Inject the interrupt. */ + // test_and_set_bit(vector & 0x1f, irr_reg); + // } + + return 0; +} + +#else + +static void mshv_free_apicid_to_cpuid_mapping(void) {} +static int mshv_create_apicid_to_cpuid_mapping(struct device *) { return 0; } +static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *) { return false; } + +#endif /* defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) */ + +/* + * Pull the interrupts in the `proxy_irr` field into the VAPIC page + * Returns true if an exit to user-space is required (sync tmr state) + */ +static bool __mshv_pull_proxy_irr(struct mshv_vtl_run *run, struct page *apic_page) +{ + u32 *apic_page_irr = (u32 *)((char *)page_address(apic_page) + APIC_IRR); + + if (!xchg(&run->scan_proxy_irr, 0) || !apic_page_irr) + return false; + + for (int i = 0; i < 8; i++) { + const u32 val = xchg(&run->proxy_irr[i], 0); + + if (!val) + continue; + + if (run->proxy_irr_exit_mask[i] & val) { + /* + * This vector was previously used for a level-triggered interrupt. + * An edge-triggered interrupt has now arrived, so we need to involve + * user-space to clear its copy of the tmr. + * Put the interrupt(s) back on the run page so it can do so. + * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write + */ + atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); + WRITE_ONCE(run->scan_proxy_irr, 1); + return true; + } + + /* + * IRR is non-contiguous. + * Each bank is 4 bytes with 12 bytes of padding between banks. + */ + apic_page_irr[i * 4] |= val; + } + + return false; +} + +#endif /* defined(CONFIG_X86_64) */ static long __mshv_vtl_ioctl_check_extension(u32 arg) { @@ -320,7 +599,7 @@ static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) } #ifdef CONFIG_X86_64 -static int mshv_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) +static int mshv_snp_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) { struct page *page; struct hv_register_assoc reg_assoc = {}; @@ -469,6 +748,7 @@ static void mshv_vtl_scan_proxy_interrupts(struct hv_per_cpu_context *per_cpu) } else { /* A malicious hypervisor might set a vector > 255. */ vector = READ_ONCE(proxy->u.asserted_vector) & 0xff; + const u32 bank = vector / 32; const u32 masked_irr = BIT(vector % 32) & ~READ_ONCE(run->proxy_irr_blocked[bank]); @@ -626,16 +906,43 @@ static int mshv_vtl_alloc_context(unsigned int cpu) mshv_write_tdx_apic_page(page_to_phys(tdx_apic_page)); #endif } else if (hv_isolation_type_snp()) { -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; + u64 apic_id; int ret; - ret = mshv_configure_vmsa_page(0, &per_cpu->vmsa_page); + ret = mshv_snp_configure_vmsa_page(0, &per_cpu->vmsa_page); if (ret < 0) return ret; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + goto synic; + + snp_secure_avic_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!snp_secure_avic_page) + return -ENOMEM; + + /* VMPL 2 for the VTL0 */ + ret = rmpadjust((unsigned long)page_address(snp_secure_avic_page), + RMP_PG_SIZE_4K, 2 | RMPADJUST_ENABLE_READ | RMPADJUST_ENABLE_WRITE); + if (ret) { + pr_err("failed to adjust RMP for the secure AVIC page: %d\n", ret); + free_page((u64)snp_secure_avic_page); + return -EINVAL; + } + + /* Some very basic initialization */ + // ret = sev_ghcb_msr_read(APIC_BASE_MSR + (APIC_ID >> 4), &apic_id); + // BUG_ON(ret != ES_OK); + // WRITE_ONCE(*((u32*)page_address(snp_secure_avic_page) + APIC_ID), lower_32_bits(apic_id)); + x2apic_savic_init_backing_page(page_address(snp_secure_avic_page)); // ??? + + per_cpu->snp_secure_avic_page = snp_secure_avic_page; #endif } else if (mshv_vsm_capabilities.intercept_page_available) mshv_vtl_configure_reg_page(per_cpu); +synic: mshv_vtl_synic_enable_regs(cpu); return 0; @@ -997,62 +1304,7 @@ static void mshv_vtl_idle(void) #define enter_mode(mode) ((mode) & MODE_MASK) #define reenter_mode(mode) (((mode) >> REENTER_SHIFT) & MODE_MASK) -/* - * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), - * is a hot path on TDX. By performing some of the common functionality entirely in-kernel - * we eliminate costly user<->kernel transitions. - */ -#ifndef CONFIG_INTEL_TDX_GUEST -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) {} -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *) { return 0; } -static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } -#else -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) -{ - int bkt; - struct apicid_to_cpuid_entry *entry; - struct hlist_node *tmp; - - hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { - hash_del(&entry->node); - kfree(entry); - } -} - -/* - * Creates and populates the apicid_to_cpuid hash table. - * This mapping is used for fast ICR emulation on TDX. - * Returns 0 on success. - */ -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *dev) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) { - struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - - if (!entry) { - ret = -ENOMEM; - break; - } - - entry->apicid = cpuid_to_apicid[cpu]; - entry->cpuid = cpu; - - if (entry->apicid == BAD_APICID) { - dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); - ret = -ENODEV; - break; - } - - hash_add(apicid_to_cpuid, &entry->node, entry->apicid); - } - - if (ret) - mshv_tdx_free_apicid_to_cpuid_mapping(); - - return ret; -} +#ifdef CONFIG_INTEL_TDX_GUEST static void mshv_tdx_advance_to_next_instruction(struct tdx_vp_context *context) { @@ -1090,28 +1342,6 @@ static bool mshv_tdx_is_simple_icr_write(const struct tdx_vp_context *context) return fixed && edge; } -/* - * Returns the cpumask described by dest, where dest is a logical destination. - * cpu_mask should have no CPUs set. - * Returns 0 on success - */ -static int mshv_tdx_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) -{ - int ret = 0; - - while ((u16)dest) { - const u16 i = fls((u16)dest) - 1; - const u32 physical_id = (dest >> 16 << 4) | i; - - ret = mshv_tdx_set_cpumask_from_apicid(physical_id, cpu_mask); - dest &= ~BIT(i); - if (ret) - break; - } - - return ret; -} - /* * Attempts to handle an ICR write. Returns 0 if successful, other values * indicate user-space should be invoked to gracefully handle the error. @@ -1120,101 +1350,21 @@ static int mshv_tdx_handle_simple_icr_write(struct tdx_vp_context *context) { const u32 icr_lo = context->l2_enter_guest_state.rax; const u32 dest = context->l2_enter_guest_state.rdx; - const u8 shorthand = (icr_lo >> 18) & 0b11; - const u8 vector = icr_lo; - const u64 bank = vector / 32; - const u32 mask = BIT(vector % 32); - const u32 self = smp_processor_id(); - - bool send_ipi = false; struct cpumask local_mask = {}; - unsigned int cpu = 0; int ret = 0; - if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ - cpumask_copy(&local_mask, cpu_online_mask); - } else if (shorthand == 0b11) { /* shorthand all but self */ - cpumask_copy(&local_mask, cpu_online_mask); - cpumask_clear_cpu(self, &local_mask); - } else if (shorthand == 0b01) { /* shorthand self */ - cpumask_set_cpu(self, &local_mask); - } else if (icr_lo & BIT(11)) { /* logical */ - ret = mshv_tdx_get_logical_cpumask(dest, &local_mask); - } else { /* physical */ - ret = mshv_tdx_set_cpumask_from_apicid(dest, &local_mask); - } - + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); if (ret) return ret; - - for_each_cpu(cpu, &local_mask) { - /* - * The kernel doesn't provide an atomic_or which operates on u32, - * so cast to atomic_t, which should have the same layout - */ - static_assert(sizeof(atomic_t) == sizeof(u32)); - atomic_or(mask, (atomic_t *) - (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); - smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); - send_ipi |= cpu != self; - } - - if (send_ipi) { - cpumask_clear_cpu(self, &local_mask); - __apic_send_IPI_mask(&local_mask, RESCHEDULE_VECTOR); - } - mshv_tdx_advance_to_next_instruction(context); mshv_tdx_clear_exit_reason(context); return 0; } -static u32 *mshv_tdx_vapic_irr(void) -{ - return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); -} - -/* - * Pull the interrupts in the `proxy_irr` field into the VAPIC page - * Returns true if an exit to user-space is required (sync tmr state) - */ -static bool mshv_tdx_pull_proxy_irr(struct mshv_vtl_run *run) -{ - u32 *apic_page_irr = mshv_tdx_vapic_irr(); - - if (!xchg(&run->scan_proxy_irr, 0)) - return false; - - for (int i = 0; i < 8; i++) { - const u32 val = xchg(&run->proxy_irr[i], 0); - - if (!val) - continue; - - if (run->proxy_irr_exit_mask[i] & val) { - /* - * This vector was previously used for a level-triggered interrupt. - * An edge-triggered interrupt has now arrived, so we need to involve - * user-space to clear its copy of the tmr. - * Put the interrupt(s) back on the run page so it can do so. - * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write - */ - atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); - WRITE_ONCE(run->scan_proxy_irr, 1); - return true; - } - - /* - * IRR is non-contiguous. - * Each bank is 4 bytes with 12 bytes of padding between banks. - */ - apic_page_irr[i * 4] |= val; - } - - return false; -} - /* * Checks if exit reason is due: * - An interrupt for the L1 @@ -1334,6 +1484,179 @@ static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *run) } #endif /* CONFIG_INTEL_TDX_GUEST */ +#if defined(CONFIG_SEV_GUEST) + +static struct page *snp_this_savic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); +} + +static struct sev_es_save_area *snp_this_vmsa(void) +{ + struct page *vmsa_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page); + return page_address(vmsa_page); +} + +/* + * Sets a benign guest error code so that there won't be another + * #VMEXIT for the just processed one and marks the VMSA as + * runnable. + */ +static void mshv_snp_clear_exit_code(struct sev_es_save_area *vmsa, bool int_shadow) +{ + if (int_shadow) + vmsa->vintr_ctrl |= V_INT_SHADOW_MASK; + else + vmsa->vintr_ctrl &= ~V_INT_SHADOW_MASK; + vmsa->guest_exit_code = SVM_EXIT_INTR; + vmsa->vintr_ctrl &= ~V_GUEST_BUSY_MASK; +} + +/* + * Try to handle the incomplete IPI SEV-SNP exit. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_incomplete_ipi(struct mshv_vtl_run *run, + struct sev_es_save_area *vmsa) +{ + u32 icr_lo = vmsa->guest_exit_info_1; + u32 dest = vmsa->guest_exit_info_1 >> 32; + + /* Route the INIT, SIPI, NMI to the user mode for now. */ + if ((icr_lo & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + return false; + /* Can handle only edge-triggered interrupts. */ + if (icr_lo & APIC_INT_LEVELTRIG) + return false; + + if (mshv_snp_handle_simple_icr_write(icr_lo, dest)) + return false; + + return true; +} + +/* + * Try to handle an SEV-SNP exit entirely in kernel, to avoid the overhead of a + * user<->kernel transition. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) +{ + const bool intr_inject = MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT & run->offload_flags; + const bool x2apic = MSHV_VTL_OFFLOAD_FLAG_X2APIC & run->offload_flags; + struct sev_es_save_area *vmsa; + u8 *offload_flags; + + if (!intr_inject || !x2apic) + return false; + + vmsa = snp_this_vmsa(); + + switch (vmsa->guest_exit_code) + { + case SVM_EXIT_AVIC_INCOMPLETE_IPI: + if (mshv_snp_try_handle_incomplete_ipi(run, vmsa)) + goto handled; + break; + case SVM_EXIT_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + goto handled; + case SVM_EXIT_IDLE_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + goto handled; + case SVM_EXIT_MSR: + if (vmsa->rcx == HV_X64_MSR_GUEST_IDLE && !(vmsa->guest_exit_info_1 & 1)) { + /* The guest indicates it's idle by reading this synthetic MSR. */ + vmsa->rax = 0; + vmsa->rdx = 0; + vmsa->rip += 2; /* vmsa->guest_nrip might not be available although here it should be. */ + + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + + goto handled; + } + break; + default: + break; + } + + offload_flags = &run->offload_flags; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + if (!(*offload_flags & MSHV_VTL_OFFLOAD_FLAG_HALT_OTHER)) + run->flags &= ~MSHV_VTL_RUN_FLAG_HALTED; + + return false; + +handled: + + mshv_snp_clear_exit_code(vmsa, false); + return true; +} + +static bool mshv_snp_try_handle_intercept(struct mshv_vtl_run *run) +{ + struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()]; + u32 msg_type = HVMSG_NONE; + struct hv_message *msg = NULL; + + switch (hvp->vtl_entry_reason) { + case MSHV_ENTRY_REASON_INTERRUPT: + if (!mshv_vsm_capabilities.intercept_page_available) + { + struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); + void *synic_message_page = mshv_cpu->synic_message_page; + + if (likely(synic_message_page)) + msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; + } + break; + + case MSHV_ENTRY_REASON_INTERCEPT: + WARN_ON(!mshv_vsm_capabilities.intercept_page_available); + msg = (struct hv_message *)hvp->intercept_message; + break; + + default: + panic("unknown entry reason: %d", hvp->vtl_entry_reason); + } + + if (!msg) + return true; + msg_type = READ_ONCE(msg->header.message_type); + + switch (msg_type) { + case HVMSG_NONE: + break; + case HVMSG_X64_EXCEPTION_INTERCEPT: + { + struct hv_x64_exception_intercept_message *expt_msg = + (struct hv_x64_exception_intercept_message*)msg->u.payload; + if (expt_msg->exception_vector != X86_TRAP_VC) + return false; + } + break; + case HVMSG_SYNIC_SINT_DELIVERABLE: + return false; + case HVMSG_X64_HALT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + break; + default: + return false; + } + + return true; +} +#endif /* CONFIG_SEV_GUEST */ + /* * Attempts to directly inject the interrupts in the proxy_irr field. * Returns true if an exit to user-space is required. @@ -1342,14 +1665,20 @@ static bool mshv_pull_proxy_irr(struct mshv_vtl_run *run) { bool ret = READ_ONCE(run->scan_proxy_irr); - if (!hv_isolation_type_tdx() || - !(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) + if (!(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) return ret; + if (hv_isolation_type_tdx()) { #ifdef CONFIG_INTEL_TDX_GUEST - ret = mshv_tdx_pull_proxy_irr(run); - mshv_tdx_update_rvi_halt(run); + ret = __mshv_pull_proxy_irr(run, tdx_this_apic_page()); + mshv_tdx_update_rvi_halt(run); +#endif + } else if (hv_isolation_type_snp()) { +#ifdef CONFIG_SEV_GUEST + ret = __mshv_pull_proxy_irr(run, snp_this_savic_page()); #endif + } + return ret; } @@ -1423,6 +1752,10 @@ static int mshv_vtl_ioctl_return_to_lower_vtl(void) continue; /* Exit handled entirely in kernel */ else goto done; + } else if (hv_isolation_type_snp()) { + if (mshv_snp_try_handle_intercept(mshv_vtl_this_run()) && + mshv_snp_try_handle_exit(mshv_vtl_this_run())) + continue; /* Exit handled entirely in kernel */ } hvp = hv_vp_assist_page[smp_processor_id()]; @@ -1921,7 +2254,7 @@ static void guest_vsm_vmsa_pfn_this_cpu(void *arg) cpu = get_cpu(); vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); if (!vmsa_guest_vsm_page) { - if (mshv_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) + if (mshv_snp_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) *pfn = -ENOMEM; else vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); @@ -1952,6 +2285,41 @@ static long mshv_vtl_ioctl_guest_vsm_vmsa_pfn(void __user *user_arg) return ret; } + +static void secure_avic_vtl0_this_cpu(void *arg) +{ + int cpu; + struct page *snp_secure_avic_page; + u64 *pfn = arg; + + cpu = get_cpu(); + snp_secure_avic_page = *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); + put_cpu(); + + *pfn = snp_secure_avic_page ? page_to_pfn(snp_secure_avic_page) : -ENOMEM; +} + +static long mshv_vtl_ioctl_secure_avic_vtl0_pfn(void __user *user_arg) +{ + u64 pfn; + u32 cpu_id; + long ret; + + ret = copy_from_user(&cpu_id, user_arg, sizeof(cpu_id)) ? -EFAULT : 0; + if (ret) + return ret; + + ret = smp_call_function_single(cpu_id, secure_avic_vtl0_this_cpu, &pfn, true); + if (ret) + return ret; + ret = (long)pfn; + if (ret < 0) + return ret; + + ret = copy_to_user(user_arg, &pfn, sizeof(pfn)) ? -EFAULT : 0; + + return ret; +} #endif static void ack_kick(void *cancel_cpu_run) @@ -2084,6 +2452,9 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_VTL_GUEST_VSM_VMSA_PFN: ret = mshv_vtl_ioctl_guest_vsm_vmsa_pfn((void __user *)arg); break; + case MSHV_VTL_SECURE_AVIC_VTL0_PFN: + ret = mshv_vtl_ioctl_secure_avic_vtl0_pfn((void __user *)arg); + break; #endif case MSHV_VTL_KICK_CPU: @@ -2100,7 +2471,7 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) { - struct page *page; + struct page *page = NULL; int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; @@ -2124,7 +2495,7 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; page_ptr_ptr = per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu); if (!*page_ptr_ptr) { - if (mshv_configure_vmsa_page(1, page_ptr_ptr) < 0) + if (mshv_snp_configure_vmsa_page(1, page_ptr_ptr) < 0) return VM_FAULT_SIGBUS; } page = *page_ptr_ptr; @@ -2132,18 +2503,16 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) if (!hv_isolation_type_snp()) return VM_FAULT_SIGBUS; page = *per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page, cpu); -#ifdef CONFIG_INTEL_TDX_GUEST } else if (real_off == MSHV_APIC_PAGE_OFFSET) { - if (!hv_isolation_type_tdx()) - return VM_FAULT_SIGBUS; - - page = tdx_apic_page(cpu); -#endif + page = mshv_apic_page(cpu); #endif } else { return VM_FAULT_NOPAGE; } + if (!page) + return VM_FAULT_SIGBUS; + get_page(page); vmf->page = page; @@ -2774,7 +3143,7 @@ static int __init mshv_vtl_init(void) goto unset_func; } - ret = mshv_tdx_create_apicid_to_cpuid_mapping(dev); + ret = mshv_create_apicid_to_cpuid_mapping(dev); if (ret) goto unset_func; @@ -2843,7 +3212,7 @@ static int __init mshv_vtl_init(void) static void __exit mshv_vtl_exit(void) { mshv_setup_vtl_func(NULL, NULL, NULL); - mshv_tdx_free_apicid_to_cpuid_mapping(); + mshv_free_apicid_to_cpuid_mapping(); misc_deregister(&mshv_vtl_sint_dev); misc_deregister(&mshv_vtl_hvcall); misc_deregister(&mshv_vtl_low); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 02f0a4ab723e..ddeef1ebbad8 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -749,6 +749,23 @@ struct hv_get_vp_registers_output { }; }; +union hv_x64_register_sev_gpa_page { + u64 u64; + struct { + u64 enabled:1; + u64 reserved:11; + u64 pagenumber:52; + }; +} __packed; + +union hv_register_value { + u128 reg128; + u64 reg64; + u32 reg32; + u16 reg16; + u8 reg8; +}; + /* HvSetVpRegisters hypercall with variable size reg name/value list*/ struct hv_set_vp_registers_input { struct { @@ -761,8 +778,13 @@ struct hv_set_vp_registers_input { u32 name; u32 padding1; u64 padding2; - u64 valuelow; - u64 valuehigh; + union { + union hv_register_value value; + struct { + u64 valuelow; + u64 valuehigh; + }; + }; } element[]; } __packed; diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..801208678450 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -88,6 +88,14 @@ enum cc_attr { * enabled to run SEV-SNP guests. */ CC_ATTR_HOST_SEV_SNP, + + /** + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. + * + * The host kernel is running with the necessary features enabled + * to run SEV-SNP guests with full Secure AVIC capabilities. + */ + CC_ATTR_SNP_SECURE_AVIC, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 7ba3a3f24989..ebe390277092 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -367,6 +367,7 @@ struct mshv_kick_cpus { #define MSHV_VTL_RMPQUERY _IOW(MSHV_IOCTL, 0x35, struct mshv_rmpquery) #define MSHV_VTL_INVLPGB _IOW(MSHV_IOCTL, 0x36, struct mshv_invlpgb) #define MSHV_VTL_TLBSYNC _IO(MSHV_IOCTL, 0x37) +#define MSHV_VTL_SECURE_AVIC_VTL0_PFN _IOWR(MSHV_IOCTL, 0x39, __u64) /* VMBus device IOCTLs */