From 30b24de2d2fc2c5e7d60f06e42aa94af5fbbe0bd Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 09:15:32 +0000 Subject: [PATCH 1/7] [hyperlight_host] mshv: dump core on unknown HV message Usually, an unknown HV message is the result of a double fault or similar: something going wrong in the guest. This commit ensures that a core file is generated when this happens and the crashdump feature is enabled. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> [hyperlight_host] Expose MemoryRegion structure It is a convenient tuple of information to use when mapping a new region into the sandbox. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_host/src/hypervisor/hyperv_linux.rs | 2 ++ src/hyperlight_host/src/mem/memory_region.rs | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index db5037106..48e589361 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -775,6 +775,8 @@ impl Hypervisor for HypervLinuxDriver { } other => { crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self); + #[cfg(crashdump)] + let _ = crashdump::generate_crashdump(self); log_then_return!("unknown Hyper-V run message type {:?}", other); } }, diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index a7e22255b..c24ac197c 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -182,13 +182,13 @@ pub enum MemoryRegionType { #[derive(Debug, Clone, PartialEq, Eq)] pub struct MemoryRegion { /// the range of guest memory addresses - pub(crate) guest_region: Range, + pub guest_region: Range, /// the range of host memory addresses - pub(crate) host_region: Range, + pub host_region: Range, /// memory access flags for the given region - pub(crate) flags: MemoryRegionFlags, + pub flags: MemoryRegionFlags, /// the type of memory region - pub(crate) region_type: MemoryRegionType, + pub region_type: MemoryRegionType, } pub(crate) struct MemoryRegionVecBuilder { From 0d65b8707c38e2873bde18f0f0d4a25b235417e3 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 01:10:35 +0000 Subject: [PATCH 2/7] [hyperlight_host] Allow mapping a host memory region into a guest Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_host/src/func/call_ctx.rs | 25 ++++++ .../src/hypervisor/hyperv_linux.rs | 35 ++++++++ .../src/hypervisor/hyperv_windows.rs | 19 +++- src/hyperlight_host/src/hypervisor/kvm.rs | 15 ++++ src/hyperlight_host/src/hypervisor/mod.rs | 9 ++ src/hyperlight_host/src/mem/mgr.rs | 19 +++- .../src/mem/shared_mem_snapshot.rs | 16 ++-- .../src/sandbox/initialized_multi_use.rs | 87 ++++++++++++++++++- 8 files changed, 211 insertions(+), 14 deletions(-) diff --git a/src/hyperlight_host/src/func/call_ctx.rs b/src/hyperlight_host/src/func/call_ctx.rs index 180731910..168437b97 100644 --- a/src/hyperlight_host/src/func/call_ctx.rs +++ b/src/hyperlight_host/src/func/call_ctx.rs @@ -17,6 +17,7 @@ limitations under the License. use tracing::{Span, instrument}; use super::{ParameterTuple, SupportedReturnType}; +use crate::mem::memory_region::MemoryRegion; use crate::sandbox::Callable; use crate::{MultiUseSandbox, Result}; /// A context for calling guest functions. @@ -70,6 +71,30 @@ impl MultiUseGuestCallContext { pub(crate) fn finish_no_reset(self) -> MultiUseSandbox { self.sbox } + + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + /// + /// `rgn.region_type` is ignored, since guest PTEs are not created + /// for the new memory. + /// + /// # Safety + /// It is the caller's responsibility to ensure that the host side + /// of the region remains intact and is not written to until this + /// mapping is removed, either due to the destruction of the + /// sandbox or due to a state rollback + pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + unsafe { self.sbox.map_region(rgn) } + } + + /// Map the contents of a file into the guest at a particular address + /// + /// Returns the length of the mapping + pub fn map_file_cow(&mut self, fp: &std::path::Path, guest_base: u64) -> Result { + self.sbox.map_file_cow(fp, guest_base) + } } impl Callable for MultiUseGuestCallContext { diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 48e589361..90e91f496 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -297,6 +297,7 @@ pub(crate) fn is_hypervisor_present() -> bool { /// called the Microsoft Hypervisor (MSHV) pub(crate) struct HypervLinuxDriver { _mshv: Mshv, + page_size: usize, vm_fd: VmFd, vcpu_fd: VcpuFd, entrypoint: u64, @@ -424,6 +425,7 @@ impl HypervLinuxDriver { #[allow(unused_mut)] let mut hv = Self { _mshv: mshv, + page_size: 0, vm_fd, vcpu_fd, mem_regions, @@ -525,6 +527,8 @@ impl Hypervisor for HypervLinuxDriver { max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { + self.page_size = page_size as usize; + let max_guest_log_level: u64 = match max_guest_log_level { Some(level) => level as u64, None => self.get_max_log_level().into(), @@ -556,6 +560,37 @@ impl Hypervisor for HypervLinuxDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if [ + rgn.guest_region.start, + rgn.guest_region.end, + rgn.host_region.start, + rgn.host_region.end, + ] + .iter() + .any(|x| x % self.page_size != 0) + { + log_then_return!("region is not page-aligned"); + } + let mshv_region: mshv_user_mem_region = rgn.to_owned().into(); + self.vm_fd.map_user_memory(mshv_region)?; + self.mem_regions.push(rgn.to_owned()); + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + for rgn in self + .mem_regions + .split_off(self.mem_regions.len() - n as usize) + { + let mshv_region: mshv_user_mem_region = rgn.to_owned().into(); + self.vm_fd.unmap_user_memory(mshv_region)?; + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 288b5bf5b..cd0398854 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -36,8 +36,8 @@ use { DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, HypervDebug, VcpuStopReason, }, super::handlers::DbgMemAccessHandlerWrapper, + crate::HyperlightError, crate::hypervisor::handlers::DbgMemAccessHandlerCaller, - crate::{HyperlightError, log_then_return}, std::sync::Mutex, }; @@ -59,7 +59,7 @@ use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; #[cfg(crashdump)] use crate::sandbox::uninitialized::SandboxRuntimeConfig; -use crate::{Result, debug, new_error}; +use crate::{Result, debug, log_then_return, new_error}; #[cfg(gdb)] mod debug { @@ -606,6 +606,21 @@ impl Hypervisor for HypervWindowsDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> { + log_then_return!("Mapping host memory into the guest not yet supported on this platform"); + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + if n > 0 { + log_then_return!( + "Mapping host memory into the guest not yet supported on this platform" + ); + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index d85a6a838..3da9786cd 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -493,6 +493,21 @@ impl Hypervisor for KVMDriver { Ok(()) } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> { + log_then_return!("Mapping host memory into the guest not yet supported on this platform"); + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> { + if n > 0 { + log_then_return!( + "Mapping host memory into the guest not yet supported on this platform" + ); + } + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn dispatch_call_from_host( &mut self, diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 0a31ee468..ecf6acbc5 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -132,6 +132,15 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()>; + + /// Unmap the most recent `n` regions mapped by `map_region` + unsafe fn unmap_regions(&mut self, n: u64) -> Result<()>; + /// Dispatch a call from the host to the guest using the given pointer /// to the dispatch function _in the guest's address space_. /// diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 7910d9dc2..90cb76573 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -73,6 +73,8 @@ pub(crate) struct SandboxMemoryManager { pub(crate) load_addr: RawPtr, /// Offset for the execution entrypoint from `load_addr` pub(crate) entrypoint_offset: Offset, + /// How many memory regions were mapped after sandbox creation + pub(crate) mapped_rgns: u64, /// A vector of memory snapshots that can be used to save and restore the state of the memory /// This is used by the Rust Sandbox implementation (rather than the mem_snapshot field above which only exists to support current C API) snapshots: Arc>>, @@ -95,6 +97,7 @@ where shared_mem, load_addr, entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), } } @@ -265,7 +268,7 @@ where /// this function will create a memory snapshot and push it onto the stack of snapshots /// It should be used when you want to save the state of the memory, for example, when evolving a sandbox to a new state pub(crate) fn push_state(&mut self) -> Result<()> { - let snapshot = SharedMemorySnapshot::new(&mut self.shared_mem)?; + let snapshot = SharedMemorySnapshot::new(&mut self.shared_mem, self.mapped_rgns)?; self.snapshots .try_lock() .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))? @@ -277,7 +280,11 @@ where /// off the stack /// It should be used when you want to restore the state of the memory to a previous state but still want to /// retain that state, for example after calling a function in the guest - pub(crate) fn restore_state_from_last_snapshot(&mut self) -> Result<()> { + /// + /// Returns the number of memory regions mapped into the sandbox + /// that need to be unmapped in order for the restore to be + /// completed. + pub(crate) fn restore_state_from_last_snapshot(&mut self) -> Result { let mut snapshots = self .snapshots .try_lock() @@ -288,13 +295,15 @@ where } #[allow(clippy::unwrap_used)] // We know that last is not None because we checked it above let snapshot = last.unwrap(); - snapshot.restore_from_snapshot(&mut self.shared_mem) + let old_rgns = self.mapped_rgns; + self.mapped_rgns = snapshot.restore_from_snapshot(&mut self.shared_mem)?; + Ok(old_rgns - self.mapped_rgns) } /// this function pops the last snapshot off the stack and restores the memory to the previous state /// It should be used when you want to restore the state of the memory to a previous state and do not need to retain that state /// for example when devolving a sandbox to a previous state. - pub(crate) fn pop_and_restore_state_from_snapshot(&mut self) -> Result<()> { + pub(crate) fn pop_and_restore_state_from_snapshot(&mut self) -> Result { let last = self .snapshots .try_lock() @@ -430,6 +439,7 @@ impl SandboxMemoryManager { layout: self.layout, load_addr: self.load_addr.clone(), entrypoint_offset: self.entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), }, SandboxMemoryManager { @@ -437,6 +447,7 @@ impl SandboxMemoryManager { layout: self.layout, load_addr: self.load_addr.clone(), entrypoint_offset: self.entrypoint_offset, + mapped_rgns: 0, snapshots: Arc::new(Mutex::new(Vec::new())), }, ) diff --git a/src/hyperlight_host/src/mem/shared_mem_snapshot.rs b/src/hyperlight_host/src/mem/shared_mem_snapshot.rs index d5cf565de..ac2bdc6b5 100644 --- a/src/hyperlight_host/src/mem/shared_mem_snapshot.rs +++ b/src/hyperlight_host/src/mem/shared_mem_snapshot.rs @@ -24,16 +24,21 @@ use crate::Result; #[derive(Clone)] pub(super) struct SharedMemorySnapshot { snapshot: Vec, + /// How many non-main-RAM regions were mapped when this snapshot was taken? + mapped_rgns: u64, } impl SharedMemorySnapshot { /// Take a snapshot of the memory in `shared_mem`, then create a new /// instance of `Self` with the snapshot stored therein. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(shared_mem: &mut S) -> Result { + pub(super) fn new(shared_mem: &mut S, mapped_rgns: u64) -> Result { // TODO: Track dirty pages instead of copying entire memory let snapshot = shared_mem.with_exclusivity(|e| e.copy_all_to_vec())??; - Ok(Self { snapshot }) + Ok(Self { + snapshot, + mapped_rgns, + }) } /// Take another snapshot of the internally-stored `SharedMemory`, @@ -51,8 +56,9 @@ impl SharedMemorySnapshot { pub(super) fn restore_from_snapshot( &mut self, shared_mem: &mut S, - ) -> Result<()> { - shared_mem.with_exclusivity(|e| e.copy_from_slice(self.snapshot.as_slice(), 0))? + ) -> Result { + shared_mem.with_exclusivity(|e| e.copy_from_slice(self.snapshot.as_slice(), 0))??; + Ok(self.mapped_rgns) } } @@ -69,7 +75,7 @@ mod tests { let data2 = data1.iter().map(|b| b + 1).collect::>(); let mut gm = ExclusiveSharedMemory::new(PAGE_SIZE_USIZE).unwrap(); gm.copy_from_slice(data1.as_slice(), 0).unwrap(); - let mut snap = super::SharedMemorySnapshot::new(&mut gm).unwrap(); + let mut snap = super::SharedMemorySnapshot::new(&mut gm, 0).unwrap(); { // after the first snapshot is taken, make sure gm has the equivalent // of data1 diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 8d1e27918..8df9d08ef 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -14,6 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#[cfg(unix)] +use std::os::fd::AsRawFd; +#[cfg(unix)] +use std::os::linux::fs::MetadataExt; +use std::path::Path; use std::sync::{Arc, Mutex}; use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; @@ -31,12 +36,15 @@ use crate::func::{ParameterTuple, SupportedReturnType}; use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; use crate::hypervisor::{Hypervisor, InterruptHandle}; +#[cfg(unix)] +use crate::mem::memory_region::MemoryRegionType; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; use crate::metrics::maybe_time_and_emit_guest_call; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox, Sandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; -use crate::{HyperlightError, Result}; +use crate::{HyperlightError, Result, log_then_return}; /// A sandbox that supports being used Multiple times. /// The implication of being used multiple times is two-fold: @@ -173,6 +181,75 @@ impl MultiUseSandbox { }) } + /// Map a region of host memory into the sandbox. + /// + /// Depending on the host platform, there are likely alignment + /// requirements of at least one page for base and len. + /// + /// `rgn.region_type` is ignored, since guest PTEs are not created + /// for the new memory. + /// + /// It is the caller's responsibility to ensure that the host side + /// of the region remains intact and is not written to until this + /// mapping is removed, either due to the destruction of the + /// sandbox or due to a state rollback + #[instrument(err(Debug), skip(self, rgn), parent = Span::current())] + pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { + // Stack guard pages are an internal implementation detail + // (which really should be moved into the guest) + log_then_return!("Cannot map host memory as a stack guard page"); + } + if rgn.flags.contains(MemoryRegionFlags::WRITE) { + // TODO: Implement support for writable mappings, which + // need to be registered with the memory manager so that + // writes can be rolled back when necessary. + log_then_return!("TODO: Writable mappings not yet supported"); + } + unsafe { self.vm.map_region(rgn) }?; + self.mem_mgr.unwrap_mgr_mut().mapped_rgns += 1; + Ok(()) + } + + /// Map the contents of a file into the guest at a particular address + /// + /// Returns the length of the mapping + #[instrument(err(Debug), skip(self, _fp, _guest_base), parent = Span::current())] + pub(crate) fn map_file_cow(&mut self, _fp: &Path, _guest_base: u64) -> Result { + #[cfg(windows)] + log_then_return!("mmap'ing a file into the guest is not yet supported on Windows"); + #[cfg(unix)] + unsafe { + let file = std::fs::File::options().read(true).write(true).open(_fp)?; + let file_size = file.metadata()?.st_size(); + let page_size = page_size::get(); + let size = (file_size as usize).div_ceil(page_size) * page_size; + let base = libc::mmap( + std::ptr::null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC, + libc::MAP_PRIVATE, + file.as_raw_fd(), + 0, + ); + if base == libc::MAP_FAILED { + log_then_return!("mmap error: {:?}", std::io::Error::last_os_error()); + } + + if let Err(err) = self.map_region(&MemoryRegion { + host_region: base as usize..base.wrapping_add(size) as usize, + guest_region: _guest_base as usize.._guest_base as usize + size, + flags: MemoryRegionFlags::READ | MemoryRegionFlags::EXECUTE, + region_type: MemoryRegionType::Heap, + }) { + libc::munmap(base, size); + return Err(err); + }; + + Ok(size as u64) + } + } + /// This function is kept here for fuzz testing the parameter and return types #[cfg(feature = "fuzzing")] #[instrument(err(Debug), skip(self, args), parent = Span::current())] @@ -193,7 +270,9 @@ impl MultiUseSandbox { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn restore_state(&mut self) -> Result<()> { let mem_mgr = self.mem_mgr.unwrap_mgr_mut(); - mem_mgr.restore_state_from_last_snapshot() + let rgns_to_unmap = mem_mgr.restore_state_from_last_snapshot()?; + unsafe { self.vm.unmap_regions(rgns_to_unmap)? }; + Ok(()) } pub(crate) fn call_guest_function_by_name_no_reset( @@ -275,9 +354,11 @@ impl DevolvableSandbox) -> Result { - self.mem_mgr + let rgns_to_unmap = self + .mem_mgr .unwrap_mgr_mut() .pop_and_restore_state_from_snapshot()?; + unsafe { self.vm.unmap_regions(rgns_to_unmap)? }; Ok(self) } } From 63ba9a3672734c053890eeae06da67b7e69df86c Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 01:12:31 +0000 Subject: [PATCH 3/7] [hyperlight_guest] Add basic page table modification functions Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/guest_function/call.rs | 11 + src/hyperlight_guest_bin/src/lib.rs | 1 + src/hyperlight_guest_bin/src/paging.rs | 252 ++++++++++++++++++ 3 files changed, 264 insertions(+) create mode 100644 src/hyperlight_guest_bin/src/paging.rs diff --git a/src/hyperlight_guest_bin/src/guest_function/call.rs b/src/hyperlight_guest_bin/src/guest_function/call.rs index bdaed4212..d829e2a85 100644 --- a/src/hyperlight_guest_bin/src/guest_function/call.rs +++ b/src/hyperlight_guest_bin/src/guest_function/call.rs @@ -100,6 +100,17 @@ fn internal_dispatch_function() -> Result<()> { // which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return // when running in the hypervisor. pub(crate) extern "C" fn dispatch_function() { + // The hyperlight host likes to use one partition and reset it in + // various ways; if that has happened, there might stale TLB + // entries hanging around from the former user of the + // partition. Flushing the TLB here is not quite the right thing + // to do, since incorrectly cached entries could make even this + // code not exist, but regrettably there is not a simple way for + // the host to trigger flushing when it ought to happen, so for + // now this works in practice, since the text segment is always + // part of the big identity-mapped region at the base of the + // guest. + crate::paging::flush_tlb(); let _ = internal_dispatch_function(); halt(); } diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index 473bfbfc1..c3d44f4f5 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -52,6 +52,7 @@ pub mod guest_function { pub mod guest_logger; pub mod host_comm; pub mod memory; +pub mod paging; // === Globals === #[global_allocator] diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs new file mode 100644 index 000000000..3d824e680 --- /dev/null +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -0,0 +1,252 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use alloc::alloc::Layout; +use core::arch::asm; + +use crate::OS_PAGE_SIZE; + +/// Convert a physical address in main memory to a virtual address +/// through the pysmap +/// +/// This is _not guaranteed_ to work with device memory +pub fn ptov(x: u64) -> *mut u8 { + // Currently, all of main memory is identity mapped + x as *mut u8 +} + +// TODO: This is not at all thread-safe atm +// TODO: A lot of code in this file uses inline assembly to load and +// store page table entries. It would be nice to use pointer +// volatile read/writes instead, but unfortunately we have a PTE +// at physical address 0, which is currently identity-mapped at +// virtual address 0, and Rust raw pointer operations can't be +// used to read/write from address 0. + +/// A helper structure indicating a mapping operation that needs to be +/// performed +struct MapRequest { + table_base: u64, + vmin: *mut u8, + len: u64, +} + +/// A helper structure indicating that a particular PTE needs to be +/// modified +struct MapResponse { + entry_ptr: *mut u64, + vmin: *mut u8, + len: u64, +} + +/// Assumption: all are page-aligned +pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { + let mut pml4_base: u64 = 0; + unsafe { + asm!("mov {}, cr3", out(reg) pml4_base); + } + pml4_base &= !0xfff; + modify_ptes::<47, 39>(MapRequest { + table_base: pml4_base, + vmin: virt_base, + len, + }) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<38, 30>) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<29, 21>) + .map(|r| unsafe { alloc_pte_if_needed(r) }) + .flat_map(modify_ptes::<20, 12>) + .map(|r| map_normal(phys_base, virt_base, r)) + .collect::<()>(); +} + +#[allow(unused)] +/// This function is not presently used for anything, but is useful +/// for debugging +pub unsafe fn dbg_print_address_pte(address: u64) -> u64 { + let mut pml4_base: u64 = 0; + unsafe { + asm!("mov {}, cr3", out(reg) pml4_base); + } + pml4_base &= !0xfff; + let addrs = modify_ptes::<47, 39>(MapRequest { + table_base: pml4_base, + vmin: address as *mut u8, + len: unsafe { OS_PAGE_SIZE as u64 }, + }) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<38, 30>) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<29, 21>) + .map(|r| unsafe { require_pte_exist(r) }) + .flat_map(modify_ptes::<20, 12>) + .map(|r| { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) r.entry_ptr); + } + pte + }) + .collect::>(); + if addrs.len() != 1 { + panic!("impossible: 1 page map request resolved to multiple PTEs"); + } + return addrs[0]; +} + +/// Allocate n contiguous physical pages and return the physical +/// addresses of the pages in question. +pub unsafe fn alloc_phys_pages(n: u64) -> u64 { + // Currently, since all of main memory is idmap'd, we can just + // allocate any appropriately aligned section of memory. + unsafe { + let v = alloc::alloc::alloc_zeroed( + Layout::from_size_align(n as usize * OS_PAGE_SIZE as usize, OS_PAGE_SIZE as usize) + .expect("could not create physical page allocation layout"), + ); + if v.is_null() { + panic!("could not allocate a physical page"); + } + v as u64 + } +} + +pub unsafe fn require_pte_exist(x: MapResponse) -> MapRequest { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); + } + let present = pte & 0x1; + if present == 0 { + panic!("debugging: found not-present pte"); + } + MapRequest { + table_base: pte & !0xfff, + vmin: x.vmin, + len: x.len, + } +} + +/// Page-mapping callback to allocate a next-level page table if necessary +pub unsafe fn alloc_pte_if_needed(x: MapResponse) -> MapRequest { + let mut pte: u64 = 0; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); + } + let present = pte & 0x1; + if present != 0 { + return MapRequest { + table_base: pte & !0xfff, + vmin: x.vmin, + len: x.len, + }; + } + let page_addr = unsafe { alloc_phys_pages(1) }; + unsafe { ptov(page_addr).write_bytes(0u8, OS_PAGE_SIZE as usize) }; + let pte = page_addr | + 1 << 5 | // A - we don't track accesses at table level + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + 1 << 1 | // R/W - we don't use block-level permissions + 1 << 0; // P - this entry is present + unsafe { + asm!("mov qword ptr [{}], {}", in(reg) x.entry_ptr, in(reg) pte); + } + MapRequest { + table_base: page_addr, + vmin: x.vmin, + len: x.len, + } +} + +/// Map a normal memory page +/// +/// TODO: support permissions; currently mapping is always RWX +fn map_normal(phys_base: u64, virt_base: *mut u8, r: MapResponse) { + let pte = (phys_base + (r.vmin as u64 - virt_base as u64)) | + 1 << 6 | // D - we don't presently track dirty state for anything + 1 << 5 | // A - we don't presently track access for anything + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + 1 << 1 | // R/W - for now make everything r/w + 1 << 0; // P - this entry is present + unsafe { + r.entry_ptr.write_volatile(pte); + } +} + +#[inline(always)] +/// Utility function to extract an (inclusive on both ends) bit range +/// from a quadword. +fn bits(x: u64) -> u64 { + (x & ((1 << (high_bit + 1)) - 1)) >> low_bit +} + +struct ModifyPteIterator { + request: MapRequest, + n: u64, +} +impl Iterator for ModifyPteIterator { + type Item = MapResponse; + fn next(&mut self) -> Option { + if (self.n << low_bit) >= self.request.len { + return None; + } + // next stage parameters + let next_vmin = self.request.vmin.wrapping_add((self.n << low_bit) as usize); + let entry_ptr = ptov(self.request.table_base) + .wrapping_add((bits::(next_vmin as u64) << 3) as usize) + as *mut u64; + let len_from_here = self.request.len - (self.n << low_bit); + let next_len = core::cmp::min(len_from_here, 1 << low_bit); + + // update our state + self.n += 1; + + Some(MapResponse { + entry_ptr, + vmin: next_vmin, + len: next_len, + }) + } +} +fn modify_ptes( + r: MapRequest, +) -> ModifyPteIterator { + ModifyPteIterator { request: r, n: 0 } +} + +pub fn flush_tlb() { + // Currently this just always flips CR4.PGE back and forth to + // trigger a tlb flush. We should use a faster approach where + // available + let mut orig_cr4: u64 = 0; + unsafe { + asm!("mov {}, cr4", out(reg) orig_cr4); + } + let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE + unsafe { + asm!( + "mov cr4, {}", + "mov cr4, {}", + in(reg) tmp_cr4, + in(reg) orig_cr4 + ); + } +} From 6295bec1cca5f5736f0ff24779a9296ede9a91ba Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Mon, 7 Jul 2025 22:15:48 +0000 Subject: [PATCH 4/7] [hyperlight_guest_bin] Disable red zones when compiling C Because we don't presently switch stacks on interrupt entry, we cannot support a red zone in the ABI. rustc's x86_64-unknown-none target already does not use a redzone, but clang's x86_64-unknown-none-linux target, which we use for compiling C code, does use a red zone by default. This commit modifies the clang options that we use to remove uses of a red zone from generated code. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_guest_bin/build.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/hyperlight_guest_bin/build.rs b/src/hyperlight_guest_bin/build.rs index 35a39b469..dd8d76079 100644 --- a/src/hyperlight_guest_bin/build.rs +++ b/src/hyperlight_guest_bin/build.rs @@ -88,6 +88,10 @@ fn cargo_main() { // targets will eventually show up. cfg.flag("--target=x86_64-unknown-linux-none"); + // We don't use a different stack for all interrupts, so there + // can be no red zone + cfg.flag("-mno-red-zone"); + // We don't support stack protectors at the moment, but Arch Linux clang // auto-enables them for -linux platforms, so explicitly disable them. cfg.flag("-fno-stack-protector"); @@ -245,6 +249,7 @@ fn main() -> std::process::ExitCode { "-fno-stack-protector", "-fstack-clash-protection", "-mstack-probe-size=4096", + "-mno-red-zone", ]) .arg("-nostdinc") .arg("-isystem") From b46da1222eaccc1cc359cd4cf55c9537cc59be09 Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:47:06 +0000 Subject: [PATCH 5/7] [hyperlight_guest_bin] Pop error code on context restore Previously, we did remove the error code from the stack before `iret`'ing, which resulted in the return failing. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs index 0a0d63775..f29aaa1e8 100644 --- a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs +++ b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs @@ -123,7 +123,8 @@ macro_rules! generate_exceptions { " mov rdi, rsp\n", " call {hl_exception_handler}\n", context_restore!(), - " iretq\n", // iretq is used to return from exception in x86_64 + " add rsp, 8\n", // error code + " iretq\n", // iretq is used to return from exception in x86_64 generate_excp!(0, pusherrcode), generate_excp!(1, pusherrcode), generate_excp!(2, pusherrcode), From f17f3a46742b21c57f1cf15eb9ecebac1bebd5dc Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:49:16 +0000 Subject: [PATCH 6/7] [hyperlight_guest_bin] Save/restore x87 & SSE regs on context switch We unfortunately don't have a good way of making sure that code run in exception contexts doesn't use floating point and SSE instructions. This commit simply always saves and restores the relevant registers as a workaround. We should at some point look into the alternative of ensuring that simd instructions aren't used in code reachable from an exception handler, and see if that actually improves performance. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/exceptions/interrupt_entry.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs index f29aaa1e8..bbfdd96fa 100644 --- a/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs +++ b/src/hyperlight_guest_bin/src/exceptions/interrupt_entry.rs @@ -67,9 +67,19 @@ macro_rules! context_save { " push r13\n", " push r14\n", " push r15\n", - // Save segment registers + // Save one of the segment registers to get 16-byte alignment for + // FXSAVE. TODO: consider packing the segment registers " mov rax, ds\n", " push rax\n", + // Save floating-point/SSE registers + // TODO: Don't do this unconditionally: get the exn + // handlers compiled without sse + // TODO: Check if we ever generate code with ymm/zmm in + // the handlers and save/restore those as well + " sub rsp, 512\n", + " mov rax, rsp\n", + " fxsave [rax]\n", + // Save the rest of the segment registers " mov rax, es\n", " push rax\n", " mov rax, fs\n", @@ -83,13 +93,18 @@ macro_rules! context_save { macro_rules! context_restore { () => { concat!( - // Restore segment registers + // Restore most segment registers " pop rax\n", " mov gs, rax\n", " pop rax\n", " mov fs, rax\n", " pop rax\n", " mov es, rax\n", + // Restore floating-point/SSE registers + " mov rax, rsp\n", + " fxrstor [rax]\n", + " add rsp, 512\n", + // Restore the last segment register " pop rax\n", " mov ds, rax\n", // Restore general-purpose registers From ea6fa8f16dae2325d94af39eb6ac3b441b24dcac Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 8 Jul 2025 17:23:20 +0100 Subject: [PATCH 7/7] [hyperlight_host] exceptions: allow guests to register handlers This commit adds a simple mechanism for guests to register exception handlers. It does not support any kind of chaining, so there can only be one exception handler per exception per guest, which should probably be improved in the future. This will be used in hyperlight-wasm shortly. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com> --- .../src/exceptions/handler.rs | 75 ++++++++++++++++++- src/hyperlight_guest_bin/src/lib.rs | 4 +- 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/hyperlight_guest_bin/src/exceptions/handler.rs b/src/hyperlight_guest_bin/src/exceptions/handler.rs index 5bc1a7e09..e2072bf1f 100644 --- a/src/hyperlight_guest_bin/src/exceptions/handler.rs +++ b/src/hyperlight_guest_bin/src/exceptions/handler.rs @@ -21,6 +21,45 @@ use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::outb::Exception; use hyperlight_guest::exit::abort_with_code_and_message; +use crate::paging; + +/// See AMD64 Architecture Programmer's Manual, Volume 2 +/// ยง8.9.3 Interrupt Stack Frame, pp. 283--284 +/// Figure 8-14: Long-Mode Stack After Interrupt---Same Privilege, +/// Figure 8-15: Long-Mode Stack After Interrupt---Higher Privilege +/// Subject to the proviso that we push a dummy error code of 0 for exceptions +/// for which the processor does not provide one +#[repr(C)] +pub struct ExceptionInfo { + pub error_code: u64, + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} +const _: () = assert!(core::mem::offset_of!(ExceptionInfo, rip) == 8); +const _: () = assert!(core::mem::offset_of!(ExceptionInfo, rsp) == 32); + +#[repr(C)] +/// Saved context, pushed onto the stack by exception entry code +pub struct Context { + /// in order: gs, fs, es + pub segments: [u64; 3], + pub fxsave: [u8; 512], + pub ds: u64, + /// no `rsp`, since the processor saved it + /// `rax` is at the top, `r15` the bottom + pub gprs: [u64; 15], +} +const _: () = assert!(size_of::() == 152 + 512); + +// TODO: This will eventually need to end up in a per-thread context, +// when there are threads. +pub static handlers: [core::sync::atomic::AtomicU64; 31] = + [const { core::sync::atomic::AtomicU64::new(0) }; 31]; +type handler_t = fn(n: u64, info: *mut ExceptionInfo, ctx: *mut Context, pf_addr: u64) -> bool; + /// Exception handler #[unsafe(no_mangle)] pub extern "C" fn hl_exception_handler( @@ -28,13 +67,43 @@ pub extern "C" fn hl_exception_handler( exception_number: u64, page_fault_address: u64, ) { + let ctx = stack_pointer as *mut Context; + let exn_info = (stack_pointer + size_of::() as u64) as *mut ExceptionInfo; + let exception = Exception::try_from(exception_number as u8).expect("Invalid exception number"); + + let saved_rip = unsafe { (&raw const (*exn_info).rip).read_volatile() }; + let error_code = unsafe { (&raw const (*exn_info).error_code).read_volatile() }; + let msg = format!( - "Page Fault Address: {:#x}\n\ - Stack Pointer: {:#x}", - page_fault_address, stack_pointer + "Exception vector: {:#}\n\ + Faulting Instruction: {:#x}\n\ + Page Fault Address: {:#x}\n\ + Error code: {:#x}\n\ + Stack Pointer: {:#x}", + exception_number, saved_rip, page_fault_address, error_code, stack_pointer ); + // We don't presently have any need for user-defined interrupts, + // so we only support handlers for the architecture-defined + // vectors (0-31) + if exception_number < 31 { + let handler = + handlers[exception_number as usize].load(core::sync::atomic::Ordering::Acquire); + if handler != 0 + && unsafe { + core::mem::transmute::<_, handler_t>(handler)( + exception_number, + exn_info, + ctx, + page_fault_address, + ) + } + { + return; + } + } + unsafe { abort_with_code_and_message( &[ErrorCode::GuestError as u8, exception as u8], diff --git a/src/hyperlight_guest_bin/src/lib.rs b/src/hyperlight_guest_bin/src/lib.rs index c3d44f4f5..f48a196d6 100644 --- a/src/hyperlight_guest_bin/src/lib.rs +++ b/src/hyperlight_guest_bin/src/lib.rs @@ -35,9 +35,9 @@ use spin::Once; // === Modules === #[cfg(target_arch = "x86_64")] -mod exceptions { +pub mod exceptions { pub(super) mod gdt; - mod handler; + pub mod handler; mod idt; pub(super) mod idtr; mod interrupt_entry;