diff --git a/src/hyperlight_guest_bin/build.rs b/src/hyperlight_guest_bin/build.rs index 35a39b469..dd8d76079 100644 --- a/src/hyperlight_guest_bin/build.rs +++ b/src/hyperlight_guest_bin/build.rs @@ -88,6 +88,10 @@ fn cargo_main() { // targets will eventually show up. cfg.flag("--target=x86_64-unknown-linux-none"); + // We don't use a different stack for all interrupts, so there + // can be no red zone + cfg.flag("-mno-red-zone"); + // We don't support stack protectors at the moment, but Arch Linux clang // auto-enables them for -linux platforms, so explicitly disable them. cfg.flag("-fno-stack-protector"); @@ -245,6 +249,7 @@ fn main() -> std::process::ExitCode { "-fno-stack-protector", "-fstack-clash-protection", "-mstack-probe-size=4096", + "-mno-red-zone", ]) .arg("-nostdinc") .arg("-isystem") diff --git a/src/hyperlight_guest_bin/src/exceptions/handler.rs b/src/hyperlight_guest_bin/src/exceptions/handler.rs index 5bc1a7e09..e2072bf1f 100644 --- a/src/hyperlight_guest_bin/src/exceptions/handler.rs +++ b/src/hyperlight_guest_bin/src/exceptions/handler.rs @@ -21,6 +21,45 @@ use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::outb::Exception; use hyperlight_guest::exit::abort_with_code_and_message; +use crate::paging; + +/// See AMD64 Architecture Programmer's Manual, Volume 2 +/// ยง8.9.3 Interrupt Stack Frame, pp. 283--284 +/// Figure 8-14: Long-Mode Stack After Interrupt---Same Privilege, +/// Figure 8-15: Long-Mode Stack After Interrupt---Higher Privilege +/// Subject to the proviso that we push a dummy error code of 0 for exceptions +/// for which the processor does not provide one +#[repr(C)] +pub struct ExceptionInfo { + pub error_code: u64, + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} +const _: () = assert!(core::mem::offset_of!(ExceptionInfo, rip) == 8); +const _: () = assert!(core::mem::offset_of!(ExceptionInfo, rsp) == 32); + +#[repr(C)] +/// Saved context, pushed onto the stack by exception entry code +pub struct Context { + /// in order: gs, fs, es + pub segments: [u64; 3], + pub fxsave: [u8; 512], + pub ds: u64, + /// no `rsp`, since the processor saved it + /// `rax` is at the top, `r15` the bottom + pub gprs: [u64; 15], +} +const _: () = assert!(size_of::() == 152 + 512); + +// TODO: This will eventually need to end up in a per-thread context, +// when there are threads. +pub static handlers: [core::sync::atomic::AtomicU64; 31] = + [const { core::sync::atomic::AtomicU64::new(0) }; 31]; +type handler_t = fn(n: u64, info: *mut ExceptionInfo, ctx: *mut Context, pf_addr: u64) -> bool; + /// Exception handler #[unsafe(no_mangle)] pub extern "C" fn hl_exception_handler( @@ -28,13 +67,43 @@ pub extern "C" fn hl_exception_handler( exception_number: u64, page_fault_address: u64, ) { + let ctx = stack_pointer as *mut Context; + let exn_info = (stack_pointer + size_of::() as u64) as *mut ExceptionInfo; + let exception = Exception::try_from(exception_number as u8).expect("Invalid exception number"); + + let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; + let msg = format!( - "Page Fault Address: {:#x}\n\ - Stack Pointer: {:#x}", - page_fault_address, stack_pointer + "Exception vector: {:#}\n\ + Faulting Instruction: {:#x}\n\ + Page Fault Address: {:#x}\n\ + Error code: {:#x}\n\ + Stack Pointer: {:#x}", + exception_number, saved_rip, page_fault_address, error_code, stack_pointer ); + // We don't presently have any need for user-defined interrupts, + // so we only support handlers for the architecture-defined + // vectors (0-31) + if exception_number < 31 { + let handler = + handlers[exception_number as usize].load(core::sync::atomic::Ordering::Acquire); + if handler != 0 + && unsafe { + core::mem::transmute::<_, handler_t>(handler)( + exception_number, + exn_info, + ctx, + page_fault_address, + ) + } + { + return; + } + } + unsafe { abort_with_code_and_message( &[ErrorCode::GuestError as u8, exception as u8], diff --git a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs index 0a0d63775..bbfdd96fa 100644 --- a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs +++ b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs @@ -67,9 +67,19 @@ macro_rules! context_save { " push r13\n", " push r14\n", " push r15\n", - // Save segment registers + // Save one of the segment registers to get 16-byte alignment for + // FXSAVE. TODO: consider packing the segment registers " mov rax, ds\n", " push rax\n", + // Save floating-point/SSE registers + // TODO: Don't do this unconditionally: get the exn + // handlers compiled without sse + // TODO: Check if we ever generate code with ymm/zmm in + // the handlers and save/restore those as well + " sub rsp, 512\n", + " mov rax, rsp\n", + " fxsave [rax]\n", + // Save the rest of the segment registers " mov rax, es\n", " push rax\n", " mov rax, fs\n", @@ -83,13 +93,18 @@ macro_rules! context_save { macro_rules! context_restore { () => { concat!( - // Restore segment registers + // Restore most segment registers " pop rax\n", " mov gs, rax\n", " pop rax\n", " mov fs, rax\n", " pop rax\n", " mov es, rax\n", + // Restore floating-point/SSE registers + " mov rax, rsp\n", + " fxrstor [rax]\n", + " add rsp, 512\n", + // Restore the last segment register " pop rax\n", " mov ds, rax\n", // Restore general-purpose registers @@ -123,7 +138,8 @@ macro_rules! generate_exceptions { " mov rdi, rsp\n", " call {hl_exception_handler}\n", context_restore!(), - " iretq\n", // iretq is used to return from exception in x86_64 + " add rsp, 8\n", // error code + " iretq\n", // iretq is used to return from exception in x86_64 generate_excp!(0, pusherrcode), generate_excp!(1, pusherrcode), generate_excp!(2, pusherrcode), diff --git a/src/hyperlight_guest_bin/src/guest_function/call.rs b/src/hyperlight_guest_bin/src/guest_function/call.rs index bdaed4212..d829e2a85 100644 --- a/src/hyperlight_guest_bin/src/guest_function/call.rs +++ b/src/hyperlight_guest_bin/src/guest_function/call.rs @@ -100,6 +100,17 @@ fn internal_dispatch_function() -> Result<()> { // which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return // when running in the hypervisor. pub(crate) extern "C" fn dispatch_function() { + // The hyperlight host likes to use one partition and reset it in + // various ways; if that has happened, there might stale TLB + // entries hanging around from the former user of the + // partition. Flushing the TLB here is not quite the right thing + // to do, since incorrectly cached entries could make even this + // code not exist, but regrettably there is not a simple way for + // the host to trigger flushing when it ought to happen, so for + // now this works in practice, since the text segment is always + // part of the big identity-mapped region at the base of the + // guest. + crate::paging::flush_tlb(); let _ = internal_dispatch_function(); halt(); } diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index 473bfbfc1..f48a196d6 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -35,9 +35,9 @@ use spin::Once; // === Modules === #[cfg(target_arch = "x86_64")] -mod exceptions { +pub mod exceptions { pub(super) mod gdt; - mod handler; + pub mod handler; mod idt; pub(super) mod idtr; mod interrupt_entry; @@ -52,6 +52,7 @@ pub mod guest_function { pub mod guest_logger; pub mod host_comm; pub mod memory; +pub mod paging; // === Globals === #[global_allocator] diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs new file mode 100644 index 000000000..3d824e680 --- /dev/null +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -0,0 +1,252 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use alloc::alloc::Layout; +use core::arch::asm; + +use crate::OS_PAGE_SIZE; + +/// Convert a physical address in main memory to a virtual address +/// through the pysmap +/// +/// This is _not guaranteed_ to work with device memory +pub fn ptov(x: u64) -> *mut u8 { + // Currently, all of main memory is identity mapped + x as *mut u8 +} + +// TODO: This is not at all thread-safe atm +// TODO: A lot of code in this file uses inline assembly to load and +// store page table entries. It would be nice to use pointer +// volatile read/writes instead, but unfortunately we have a PTE +// at physical address 0, which is currently identity-mapped at +// virtual address 0, and Rust raw pointer operations can't be +// used to read/write from address 0. + +/// A helper structure indicating a mapping operation that needs to be +/// performed +struct MapRequest { + table_base: u64, + vmin: *mut u8, + len: u64, +} + +/// A helper structure indicating that a particular PTE needs to be +/// modified +struct MapResponse { + entry_ptr: *mut u64, + vmin: *mut u8, + len: u64, +} + +/// Assumption: all are page-aligned +pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { + let mut pml4_base: u64 = 0; + unsafe { + asm!("mov {}, cr3", out(reg) pml4_base); + } + pml4_base &= !0xfff; + modify_ptes::<47, 39>(MapRequest { + table_base: pml4_base, + vmin: virt_base, + len, + }) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<38, 30>) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<29, 21>) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<20, 12>) + .map(|r| map_normal(phys_base, virt_base, r)) + .collect::<()>(); +} + +#[allow(unused)] +/// This function is not presently used for anything, but is useful +/// for debugging +pub unsafe fn dbg_print_address_pte(address: u64) -> u64 { + let mut pml4_base: u64 = 0; + unsafe { + asm!("mov {}, cr3", out(reg) pml4_base); + } + pml4_base &= !0xfff; + let addrs = modify_ptes::<47, 39>(MapRequest { + table_base: pml4_base, + vmin: address as *mut u8, + len: unsafe { OS_PAGE_SIZE as u64 }, + }) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<38, 30>) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<29, 21>) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<20, 12>) + .map(|r| { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) r.entry_ptr); + } + pte + }) + .collect::>(); + if addrs.len() != 1 { + panic!("impossible: 1 page map request resolved to multiple PTEs"); + } + return addrs[0]; +} + +/// Allocate n contiguous physical pages and return the physical +/// addresses of the pages in question. +pub unsafe fn alloc_phys_pages(n: u64) -> u64 { + // Currently, since all of main memory is idmap'd, we can just + // allocate any appropriately aligned section of memory. + unsafe { + let v = alloc::alloc::alloc_zeroed( + Layout::from_size_align(n as usize * OS_PAGE_SIZE as usize, OS_PAGE_SIZE as usize) + .expect("could not create physical page allocation layout"), + ); + if v.is_null() { + panic!("could not allocate a physical page"); + } + v as u64 + } +} + +pub unsafe fn require_pte_exist(x: MapResponse) -> MapRequest { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); + } + let present = pte & 0x1; + if present == 0 { + panic!("debugging: found not-present pte"); + } + MapRequest { + table_base: pte & !0xfff, + vmin: x.vmin, + len: x.len, + } +} + +/// Page-mapping callback to allocate a next-level page table if necessary +pub unsafe fn alloc_pte_if_needed(x: MapResponse) -> MapRequest { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); + } + let present = pte & 0x1; + if present != 0 { + return MapRequest { + table_base: pte & !0xfff, + vmin: x.vmin, + len: x.len, + }; + } + let page_addr = unsafe { alloc_phys_pages(1) }; + unsafe { ptov(page_addr).write_bytes(0u8, OS_PAGE_SIZE as usize) }; + let pte = page_addr | + 1 << 5 | // A - we don't track accesses at table level + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + 1 << 1 | // R/W - we don't use block-level permissions + 1 << 0; // P - this entry is present + unsafe { + asm!("mov qword ptr [{}], {}", in(reg) x.entry_ptr, in(reg) pte); + } + MapRequest { + table_base: page_addr, + vmin: x.vmin, + len: x.len, + } +} + +/// Map a normal memory page +/// +/// TODO: support permissions; currently mapping is always RWX +fn map_normal(phys_base: u64, virt_base: *mut u8, r: MapResponse) { + let pte = (phys_base + (r.vmin as u64 - virt_base as u64)) | + 1 << 6 | // D - we don't presently track dirty state for anything + 1 << 5 | // A - we don't presently track access for anything + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + 1 << 1 | // R/W - for now make everything r/w + 1 << 0; // P - this entry is present + unsafe { + r.entry_ptr.write_volatile(pte); + } +} + +#[inline(always)] +/// Utility function to extract an (inclusive on both ends) bit range +/// from a quadword. +fn bits(x: u64) -> u64 { + (x & ((1 << (high_bit + 1)) - 1)) >> low_bit +} + +struct ModifyPteIterator { + request: MapRequest, + n: u64, +} +impl Iterator for ModifyPteIterator { + type Item = MapResponse; + fn next(&mut self) -> Option { + if (self.n << low_bit) >= self.request.len { + return None; + } + // next stage parameters + let next_vmin = self.request.vmin.wrapping_add((self.n << low_bit) as usize); + let entry_ptr = ptov(self.request.table_base) + .wrapping_add((bits::(next_vmin as u64) << 3) as usize) + as *mut u64; + let len_from_here = self.request.len - (self.n << low_bit); + let next_len = core::cmp::min(len_from_here, 1 << low_bit); + + // update our state + self.n += 1; + + Some(MapResponse { + entry_ptr, + vmin: next_vmin, + len: next_len, + }) + } +} +fn modify_ptes( + r: MapRequest, +) -> ModifyPteIterator { + ModifyPteIterator { request: r, n: 0 } +} + +pub fn flush_tlb() { + // Currently this just always flips CR4.PGE back and forth to + // trigger a tlb flush. We should use a faster approach where + // available + let mut orig_cr4: u64 = 0; + unsafe { + asm!("mov {}, cr4", out(reg) orig_cr4); + } + let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE + unsafe { + asm!( + "mov cr4, {}", + "mov cr4, {}", + in(reg) tmp_cr4, + in(reg) orig_cr4 + ); + } +} diff --git a/src/hyperlight_host/src/func/call_ctx.rs b/src/hyperlight_host/src/func/call_ctx.rs index 180731910..168437b97 100644 --- a/src/hyperlight_host/src/func/call_ctx.rs +++ b/src/hyperlight_host/src/func/call_ctx.rs @@ -17,6 +17,7 @@ limitations under the License. use tracing::{Span, instrument}; use super::{ParameterTuple, SupportedReturnType}; +use crate::mem::memory_region::MemoryRegion; use crate::sandbox::Callable; use crate::{MultiUseSandbox, Result}; /// A context for calling guest functions. @@ -70,6 +71,30 @@ impl MultiUseGuestCallContext { pub(crate) fn finish_no_reset(self) -> MultiUseSandbox { self.sbox } + + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + /// + /// `rgn.region_type` is ignored, since guest PTEs are not created + /// for the new memory. + /// + /// # Safety + /// It is the caller's responsibility to ensure that the host side + /// of the region remains intact and is not written to until this + /// mapping is removed, either due to the destruction of the + /// sandbox or due to a state rollback + pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + unsafe { self.sbox.map_region(rgn) } + } + + /// Map the contents of a file into the guest at a particular address + /// + /// Returns the length of the mapping + pub fn map_file_cow(&mut self, fp: &std::path::Path, guest_base: u64) -> Result { + self.sbox.map_file_cow(fp, guest_base) + } } impl Callable for MultiUseGuestCallContext { diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index db5037106..90e91f496 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -297,6 +297,7 @@ pub(crate) fn is_hypervisor_present() -> bool { /// called the Microsoft Hypervisor (MSHV) pub(crate) struct HypervLinuxDriver { _mshv: Mshv, + page_size: usize, vm_fd: VmFd, vcpu_fd: VcpuFd, entrypoint: u64, @@ -424,6 +425,7 @@ impl HypervLinuxDriver { #[allow(unused_mut)] let mut hv = Self { _mshv: mshv, + page_size: 0, vm_fd, vcpu_fd, mem_regions, @@ -525,6 +527,8 @@ impl Hypervisor for HypervLinuxDriver { max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { + self.page_size = page_size as usize; + let max_guest_log_level: u64 = match max_guest_log_level { Some(level) => level as u64, None => self.get_max_log_level().into(), @@ -556,6 +560,37 @@ impl Hypervisor for HypervLinuxDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if [ + rgn.guest_region.start, + rgn.guest_region.end, + rgn.host_region.start, + rgn.host_region.end, + ] + .iter() + .any(|x| x % self.page_size != 0) + { + log_then_return!("region is not page-aligned"); + } + let mshv_region: mshv_user_mem_region = rgn.to_owned().into(); + self.vm_fd.map_user_memory(mshv_region)?; + self.mem_regions.push(rgn.to_owned()); + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + for rgn in self + .mem_regions + .split_off(self.mem_regions.len() - n as usize) + { + let mshv_region: mshv_user_mem_region = rgn.to_owned().into(); + self.vm_fd.unmap_user_memory(mshv_region)?; + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, @@ -775,6 +810,8 @@ impl Hypervisor for HypervLinuxDriver { } other => { crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self); + #[cfg(crashdump)] + let _ = crashdump::generate_crashdump(self); log_then_return!("unknown Hyper-V run message type {:?}", other); } }, diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 288b5bf5b..cd0398854 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -36,8 +36,8 @@ use { DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, HypervDebug, VcpuStopReason, }, super::handlers::DbgMemAccessHandlerWrapper, + crate::HyperlightError, crate::hypervisor::handlers::DbgMemAccessHandlerCaller, - crate::{HyperlightError, log_then_return}, std::sync::Mutex, }; @@ -59,7 +59,7 @@ use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; #[cfg(crashdump)] use crate::sandbox::uninitialized::SandboxRuntimeConfig; -use crate::{Result, debug, new_error}; +use crate::{Result, debug, log_then_return, new_error}; #[cfg(gdb)] mod debug { @@ -606,6 +606,21 @@ impl Hypervisor for HypervWindowsDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> { + log_then_return!("Mapping host memory into the guest not yet supported on this platform"); + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + if n > 0 { + log_then_return!( + "Mapping host memory into the guest not yet supported on this platform" + ); + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index d85a6a838..3da9786cd 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -493,6 +493,21 @@ impl Hypervisor for KVMDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> { + log_then_return!("Mapping host memory into the guest not yet supported on this platform"); + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + if n > 0 { + log_then_return!( + "Mapping host memory into the guest not yet supported on this platform" + ); + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 0a31ee468..ecf6acbc5 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -132,6 +132,15 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()>; + + /// Unmap the most recent `n` regions mapped by `map_region` + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()>; + /// Dispatch a call from the host to the guest using the given pointer /// to the dispatch function _in the guest's address space_. /// diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index a7e22255b..c24ac197c 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -182,13 +182,13 @@ pub enum MemoryRegionType { #[derive(Debug, Clone, PartialEq, Eq)] pub struct MemoryRegion { /// the range of guest memory addresses - pub(crate) guest_region: Range, + pub guest_region: Range, /// the range of host memory addresses - pub(crate) host_region: Range, + pub host_region: Range, /// memory access flags for the given region - pub(crate) flags: MemoryRegionFlags, + pub flags: MemoryRegionFlags, /// the type of memory region - pub(crate) region_type: MemoryRegionType, + pub region_type: MemoryRegionType, } pub(crate) struct MemoryRegionVecBuilder { diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 7910d9dc2..90cb76573 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -73,6 +73,8 @@ pub(crate) struct SandboxMemoryManager { pub(crate) load_addr: RawPtr, /// Offset for the execution entrypoint from `load_addr` pub(crate) entrypoint_offset: Offset, + /// How many memory regions were mapped after sandbox creation + pub(crate) mapped_rgns: u64, /// A vector of memory snapshots that can be used to save and restore the state of the memory /// This is used by the Rust Sandbox implementation (rather than the mem_snapshot field above which only exists to support current C API) snapshots: Arc>>, @@ -95,6 +97,7 @@ where shared_mem, load_addr, entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), } } @@ -265,7 +268,7 @@ where /// this function will create a memory snapshot and push it onto the stack of snapshots /// It should be used when you want to save the state of the memory, for example, when evolving a sandbox to a new state pub(crate) fn push_state(&mut self) -> Result<()> { - let snapshot = SharedMemorySnapshot::new(&mut self.shared_mem)?; + let snapshot = SharedMemorySnapshot::new(&mut self.shared_mem, self.mapped_rgns)?; self.snapshots .try_lock() .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? @@ -277,7 +280,11 @@ where /// off the stack /// It should be used when you want to restore the state of the memory to a previous state but still want to /// retain that state, for example after calling a function in the guest - pub(crate) fn restore_state_from_last_snapshot(&mut self) -> Result<()> { + /// + /// Returns the number of memory regions mapped into the sandbox + /// that need to be unmapped in order for the restore to be + /// completed. + pub(crate) fn restore_state_from_last_snapshot(&mut self) -> Result { let mut snapshots = self .snapshots .try_lock() @@ -288,13 +295,15 @@ where } #[allow(clippy::unwrap_used)] // We know that last is not None because we checked it above let snapshot = last.unwrap(); - snapshot.restore_from_snapshot(&mut self.shared_mem) + let old_rgns = self.mapped_rgns; + self.mapped_rgns = snapshot.restore_from_snapshot(&mut self.shared_mem)?; + Ok(old_rgns - self.mapped_rgns) } /// this function pops the last snapshot off the stack and restores the memory to the previous state /// It should be used when you want to restore the state of the memory to a previous state and do not need to retain that state /// for example when devolving a sandbox to a previous state. - pub(crate) fn pop_and_restore_state_from_snapshot(&mut self) -> Result<()> { + pub(crate) fn pop_and_restore_state_from_snapshot(&mut self) -> Result { let last = self .snapshots .try_lock() @@ -430,6 +439,7 @@ impl SandboxMemoryManager { layout: self.layout, load_addr: self.load_addr.clone(), entrypoint_offset: self.entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), }, SandboxMemoryManager { @@ -437,6 +447,7 @@ impl SandboxMemoryManager { layout: self.layout, load_addr: self.load_addr.clone(), entrypoint_offset: self.entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), }, ) diff --git a/src/hyperlight_host/src/mem/shared_mem_snapshot.rs b/src/hyperlight_host/src/mem/shared_mem_snapshot.rs index d5cf565de..ac2bdc6b5 100644 --- a/src/hyperlight_host/src/mem/shared_mem_snapshot.rs +++ b/src/hyperlight_host/src/mem/shared_mem_snapshot.rs @@ -24,16 +24,21 @@ use crate::Result; #[derive(Clone)] pub(super) struct SharedMemorySnapshot { snapshot: Vec, + /// How many non-main-RAM regions were mapped when this snapshot was taken? + mapped_rgns: u64, } impl SharedMemorySnapshot { /// Take a snapshot of the memory in `shared_mem`, then create a new /// instance of `Self` with the snapshot stored therein. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(shared_mem: &mut S) -> Result { + pub(super) fn new(shared_mem: &mut S, mapped_rgns: u64) -> Result { // TODO: Track dirty pages instead of copying entire memory let snapshot = shared_mem.with_exclusivity(|e| e.copy_all_to_vec())??; - Ok(Self { snapshot }) + Ok(Self { + snapshot, + mapped_rgns, + }) } /// Take another snapshot of the internally-stored `SharedMemory`, @@ -51,8 +56,9 @@ impl SharedMemorySnapshot { pub(super) fn restore_from_snapshot( &mut self, shared_mem: &mut S, - ) -> Result<()> { - shared_mem.with_exclusivity(|e| e.copy_from_slice(self.snapshot.as_slice(), 0))? + ) -> Result { + shared_mem.with_exclusivity(|e| e.copy_from_slice(self.snapshot.as_slice(), 0))??; + Ok(self.mapped_rgns) } } @@ -69,7 +75,7 @@ mod tests { let data2 = data1.iter().map(|b| b + 1).collect::>(); let mut gm = ExclusiveSharedMemory::new(PAGE_SIZE_USIZE).unwrap(); gm.copy_from_slice(data1.as_slice(), 0).unwrap(); - let mut snap = super::SharedMemorySnapshot::new(&mut gm).unwrap(); + let mut snap = super::SharedMemorySnapshot::new(&mut gm, 0).unwrap(); { // after the first snapshot is taken, make sure gm has the equivalent // of data1 diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 8d1e27918..8df9d08ef 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -14,6 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#[cfg(unix)] +use std::os::fd::AsRawFd; +#[cfg(unix)] +use std::os::linux::fs::MetadataExt; +use std::path::Path; use std::sync::{Arc, Mutex}; use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; @@ -31,12 +36,15 @@ use crate::func::{ParameterTuple, SupportedReturnType}; use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; use crate::hypervisor::{Hypervisor, InterruptHandle}; +#[cfg(unix)] +use crate::mem::memory_region::MemoryRegionType; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; use crate::metrics::maybe_time_and_emit_guest_call; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox, Sandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; -use crate::{HyperlightError, Result}; +use crate::{HyperlightError, Result, log_then_return}; /// A sandbox that supports being used Multiple times. /// The implication of being used multiple times is two-fold: @@ -173,6 +181,75 @@ impl MultiUseSandbox { }) } + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + /// + /// `rgn.region_type` is ignored, since guest PTEs are not created + /// for the new memory. + /// + /// It is the caller's responsibility to ensure that the host side + /// of the region remains intact and is not written to until this + /// mapping is removed, either due to the destruction of the + /// sandbox or due to a state rollback + #[instrument(err(Debug), skip(self, rgn), parent = Span::current())] + pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { + // Stack guard pages are an internal implementation detail + // (which really should be moved into the guest) + log_then_return!("Cannot map host memory as a stack guard page"); + } + if rgn.flags.contains(MemoryRegionFlags::WRITE) { + // TODO: Implement support for writable mappings, which + // need to be registered with the memory manager so that + // writes can be rolled back when necessary. + log_then_return!("TODO: Writable mappings not yet supported"); + } + unsafe { self.vm.map_region(rgn) }?; + self.mem_mgr.unwrap_mgr_mut().mapped_rgns += 1; + Ok(()) + } + + /// Map the contents of a file into the guest at a particular address + /// + /// Returns the length of the mapping + #[instrument(err(Debug), skip(self, _fp, _guest_base), parent = Span::current())] + pub(crate) fn map_file_cow(&mut self, _fp: &Path, _guest_base: u64) -> Result { + #[cfg(windows)] + log_then_return!("mmap'ing a file into the guest is not yet supported on Windows"); + #[cfg(unix)] + unsafe { + let file = std::fs::File::options().read(true).write(true).open(_fp)?; + let file_size = file.metadata()?.st_size(); + let page_size = page_size::get(); + let size = (file_size as usize).div_ceil(page_size) * page_size; + let base = libc::mmap( + std::ptr::null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC, + libc::MAP_PRIVATE, + file.as_raw_fd(), + 0, + ); + if base == libc::MAP_FAILED { + log_then_return!("mmap error: {:?}", std::io::Error::last_os_error()); + } + + if let Err(err) = self.map_region(&MemoryRegion { + host_region: base as usize..base.wrapping_add(size) as usize, + guest_region: _guest_base as usize.._guest_base as usize + size, + flags: MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE, + region_type: MemoryRegionType::Heap, + }) { + libc::munmap(base, size); + return Err(err); + }; + + Ok(size as u64) + } + } + /// This function is kept here for fuzz testing the parameter and return types #[cfg(feature = "fuzzing")] #[instrument(err(Debug), skip(self, args), parent = Span::current())] @@ -193,7 +270,9 @@ impl MultiUseSandbox { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn restore_state(&mut self) -> Result<()> { let mem_mgr = self.mem_mgr.unwrap_mgr_mut(); - mem_mgr.restore_state_from_last_snapshot() + let rgns_to_unmap = mem_mgr.restore_state_from_last_snapshot()?; + unsafe { self.vm.unmap_regions(rgns_to_unmap)? }; + Ok(()) } pub(crate) fn call_guest_function_by_name_no_reset( @@ -275,9 +354,11 @@ impl DevolvableSandbox) -> Result { - self.mem_mgr + let rgns_to_unmap = self + .mem_mgr .unwrap_mgr_mut() .pop_and_restore_state_from_snapshot()?; + unsafe { self.vm.unmap_regions(rgns_to_unmap)? }; Ok(self) } }