|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Copyright (C) 2025 Google |
| 4 | + */ |
| 5 | + |
| 6 | +#include <vmx/vmx.h> |
| 7 | +#include "host_vmx.h" |
| 8 | +#include "vcpu_regs.h" |
| 9 | + |
| 10 | +struct host_cpu_state { |
| 11 | + unsigned long cr0, cr3, cr4; |
| 12 | + unsigned long rip, rsp; |
| 13 | + unsigned long rflags; |
| 14 | + unsigned long fsbase, gsbase; |
| 15 | + unsigned long long debugctl, perf_global_ctrl; |
| 16 | + unsigned long long sysenter_cs, sysenter_esp, sysenter_eip; |
| 17 | + unsigned long long efer, cr_pat; |
| 18 | + unsigned short cs, ds, es, fs, gs, ss; |
| 19 | + |
| 20 | + struct desc_ptr gdt, idt; |
| 21 | +}; |
| 22 | + |
| 23 | +static inline void read_host_cpu_state(struct host_cpu_state *hcs) |
| 24 | +{ |
| 25 | + hcs->rsp = vmcs_readl(GUEST_RSP); |
| 26 | + hcs->rip = vmcs_readl(GUEST_RIP) + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 27 | + hcs->rflags = vmcs_readl(GUEST_RFLAGS); |
| 28 | + |
| 29 | + hcs->ds = vmcs_read16(GUEST_DS_SELECTOR); |
| 30 | + hcs->es = vmcs_read16(GUEST_ES_SELECTOR); |
| 31 | + hcs->fs = vmcs_read16(GUEST_FS_SELECTOR); |
| 32 | + hcs->gs = vmcs_read16(GUEST_GS_SELECTOR); |
| 33 | + hcs->ss = vmcs_read16(GUEST_SS_SELECTOR); |
| 34 | + hcs->cs = vmcs_read16(GUEST_CS_SELECTOR); |
| 35 | + |
| 36 | + hcs->fsbase = vmcs_readl(GUEST_FS_BASE); |
| 37 | + hcs->gsbase = vmcs_readl(GUEST_GS_BASE); |
| 38 | + |
| 39 | + hcs->gdt.address = vmcs_readl(GUEST_GDTR_BASE); |
| 40 | + hcs->gdt.size = vmcs_read32(GUEST_GDTR_LIMIT); |
| 41 | + hcs->idt.address = vmcs_readl(GUEST_IDTR_BASE); |
| 42 | + hcs->idt.size = vmcs_read32(GUEST_IDTR_LIMIT); |
| 43 | + |
| 44 | + hcs->debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
| 45 | + hcs->perf_global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); |
| 46 | + hcs->sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
| 47 | + hcs->sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
| 48 | + hcs->sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
| 49 | + hcs->efer = vmcs_read64(GUEST_IA32_EFER); |
| 50 | + hcs->cr_pat = vmcs_read64(GUEST_IA32_PAT); |
| 51 | + |
| 52 | + hcs->cr0 = vmcs_readl(GUEST_CR0); |
| 53 | + hcs->cr3 = vmcs_readl(GUEST_CR3); |
| 54 | + hcs->cr4 = vmcs_readl(GUEST_CR4); |
| 55 | +} |
| 56 | + |
| 57 | +#define PKVM_WRITE_CR(crnum, val) \ |
| 58 | +static inline void __pkvm_write_cr##crnum(unsigned long val) \ |
| 59 | +{ \ |
| 60 | + asm volatile("mov %0,%%cr" #crnum : "+r" (val) : : "memory"); \ |
| 61 | +} |
| 62 | + |
| 63 | +PKVM_WRITE_CR(0, val) |
| 64 | +PKVM_WRITE_CR(3, val) |
| 65 | +PKVM_WRITE_CR(4, val) |
| 66 | + |
| 67 | +/* |
| 68 | + * Restores register state from memory pointed by rdi |
| 69 | + * offset: offset of register backup in memory |
| 70 | + * dest_reg: register to be restored. |
| 71 | + */ |
| 72 | +#define STRINGIFY(x) #x |
| 73 | +#define RESTORE_VCPU_REG(offset, dest_reg) \ |
| 74 | + "mov " STRINGIFY(offset) "(%%rdi), %%" #dest_reg "\n" |
| 75 | + |
| 76 | + |
| 77 | +static inline void restore_host_special_regs(struct host_cpu_state *hcs) |
| 78 | +{ |
| 79 | + struct desc_struct *gdt_desc; |
| 80 | + tss_desc *tss; |
| 81 | + |
| 82 | + /* Reset the busy bit to reload TR */ |
| 83 | + gdt_desc = (struct desc_struct *)(hcs->gdt.address); |
| 84 | + tss = (tss_desc *)&gdt_desc[GDT_ENTRY_TSS]; |
| 85 | + tss->type = DESC_TSS; |
| 86 | + |
| 87 | + __pkvm_write_cr4(hcs->cr4); |
| 88 | + __pkvm_write_cr0(hcs->cr0); |
| 89 | + __pkvm_write_cr3(hcs->cr3); |
| 90 | + |
| 91 | + wrmsrq_safe(MSR_CORE_PERF_GLOBAL_CTRL, hcs->perf_global_ctrl); |
| 92 | + wrmsrq(MSR_IA32_DEBUGCTLMSR, hcs->debugctl); |
| 93 | + wrmsrq(MSR_IA32_SYSENTER_CS, hcs->sysenter_cs); |
| 94 | + wrmsrq(MSR_IA32_SYSENTER_ESP, hcs->sysenter_esp); |
| 95 | + wrmsrq(MSR_IA32_SYSENTER_EIP, hcs->sysenter_eip); |
| 96 | + wrmsrq(MSR_IA32_CR_PAT, hcs->cr_pat); |
| 97 | + wrmsrq(MSR_EFER, hcs->efer); |
| 98 | + |
| 99 | + asm volatile ( |
| 100 | + "lgdt %0\n" |
| 101 | + "lidt %1\n" |
| 102 | + "ltr %w2\n" |
| 103 | + "mov %3, %%ds\n" |
| 104 | + "mov %4, %%es\n" |
| 105 | + "mov %5, %%fs\n" |
| 106 | + "mov %6, %%gs\n" |
| 107 | + |
| 108 | + : |
| 109 | + : "m"(hcs->gdt), "m"(hcs->idt), "q"(GDT_ENTRY_TSS*8), |
| 110 | + "m"(hcs->ds), "m"(hcs->es), "m"(hcs->fs), "m"(hcs->gs) |
| 111 | + : "memory" |
| 112 | + ); |
| 113 | + |
| 114 | + wrmsrl(MSR_FS_BASE, hcs->fsbase); |
| 115 | + wrmsrl(MSR_GS_BASE, hcs->gsbase); |
| 116 | +} |
| 117 | + |
| 118 | +/* Restores host cpu state and returns to host in VMX root mode. */ |
| 119 | +void pkvm_vmx_reprivilege_cpu(unsigned long *vcpu_regs) |
| 120 | +{ |
| 121 | + static struct host_cpu_state hcs; |
| 122 | + |
| 123 | + read_host_cpu_state(&hcs); |
| 124 | + restore_host_special_regs(&hcs); |
| 125 | + |
| 126 | + asm volatile( |
| 127 | + /* Update stack as expected by iretq */ |
| 128 | + "pushq %0\n" |
| 129 | + "pushq %1\n" |
| 130 | + "pushq %2\n" |
| 131 | + "pushq %3\n" |
| 132 | + "pushq %4\n" |
| 133 | + |
| 134 | + /* Restore general purpose registers */ |
| 135 | + RESTORE_VCPU_REG(VCPU_RCX, rcx) |
| 136 | + RESTORE_VCPU_REG(VCPU_RDX, rdx) |
| 137 | + RESTORE_VCPU_REG(VCPU_RBX, rbx) |
| 138 | + RESTORE_VCPU_REG(VCPU_RBP, rbp) |
| 139 | + RESTORE_VCPU_REG(VCPU_RSI, rsi) |
| 140 | + RESTORE_VCPU_REG(VCPU_R8, r8) |
| 141 | + RESTORE_VCPU_REG(VCPU_R9, r9) |
| 142 | + RESTORE_VCPU_REG(VCPU_R10, r10) |
| 143 | + RESTORE_VCPU_REG(VCPU_R11, r11) |
| 144 | + RESTORE_VCPU_REG(VCPU_R12, r12) |
| 145 | + RESTORE_VCPU_REG(VCPU_R13, r13) |
| 146 | + RESTORE_VCPU_REG(VCPU_R14, r14) |
| 147 | + RESTORE_VCPU_REG(VCPU_R15, r15) |
| 148 | + |
| 149 | + /* Restore RDI (last!) */ |
| 150 | + RESTORE_VCPU_REG(VCPU_RDI, rdi) |
| 151 | + |
| 152 | + /* |
| 153 | + * We are not technically returning from the hypercall, but set |
| 154 | + * RAX to zero to indicate to host that reprivilege succeeded. |
| 155 | + */ |
| 156 | + "xor %%rax, %%rax\n" |
| 157 | + |
| 158 | + "iretq\n" |
| 159 | + |
| 160 | + : |
| 161 | + : "m"(hcs.ss), "m"(hcs.rsp), "m"(hcs.rflags), |
| 162 | + "m"(hcs.cs), "m"(hcs.rip), "D"(vcpu_regs) |
| 163 | + : "memory", "cc" |
| 164 | + ); |
| 165 | +} |
| 166 | +STACK_FRAME_NON_STANDARD(pkvm_vmx_reprivilege_cpu); |
0 commit comments